diff options
768 files changed, 28193 insertions, 14141 deletions
diff --git a/Documentation/auxdisplay/cfag12864b-example.c b/Documentation/auxdisplay/cfag12864b-example.c index 1d2c010bae1..e7823ffb1ca 100644 --- a/Documentation/auxdisplay/cfag12864b-example.c +++ b/Documentation/auxdisplay/cfag12864b-example.c @@ -194,7 +194,6 @@ static void cfag12864b_blit(void) */ #include <stdio.h> -#include <string.h> #define EXAMPLES 6 diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 6eb1a97e88c..455d4e6d346 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -408,6 +408,26 @@ You can attach the current shell task by echoing 0: # echo 0 > tasks +2.3 Mounting hierarchies by name +-------------------------------- + +Passing the name=<x> option when mounting a cgroups hierarchy +associates the given name with the hierarchy. This can be used when +mounting a pre-existing hierarchy, in order to refer to it by name +rather than by its set of active subsystems. Each hierarchy is either +nameless, or has a unique name. + +The name should match [\w.-]+ + +When passing a name=<x> option for a new hierarchy, you need to +specify subsystems manually; the legacy behaviour of mounting all +subsystems when none are explicitly specified is not supported when +you give a subsystem a name. + +The name of the subsystem appears as part of the hierarchy description +in /proc/mounts and /proc/<pid>/cgroups. + + 3. Kernel API ============= @@ -501,7 +521,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be called multiple times against a cgroup. int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct task_struct *task) + struct task_struct *task, bool threadgroup) (cgroup_mutex held by caller) Called prior to moving a task into a cgroup; if the subsystem @@ -509,14 +529,20 @@ returns an error, this will abort the attach operation. If a NULL task is passed, then a successful result indicates that *any* unspecified task can be moved into the cgroup. Note that this isn't called on a fork. If this method returns 0 (success) then this should -remain valid while the caller holds cgroup_mutex. +remain valid while the caller holds cgroup_mutex. If threadgroup is +true, then a successful result indicates that all threads in the given +thread's threadgroup can be moved together. void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *task) + struct cgroup *old_cgrp, struct task_struct *task, + bool threadgroup) (cgroup_mutex held by caller) Called after the task has been attached to the cgroup, to allow any post-attachment activity that requires memory allocations or blocking. +If threadgroup is true, the subsystem should take care of all threads +in the specified thread's threadgroup. Currently does not support any +subsystem that might need the old_cgrp for every thread in the group. void fork(struct cgroup_subsy *ss, struct task_struct *task) diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 23d1262c077..b871f2552b4 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -179,6 +179,9 @@ The reclaim algorithm has not been modified for cgroups, except that pages that are selected for reclaiming come from the per cgroup LRU list. +NOTE: Reclaim does not work for the root cgroup, since we cannot set any +limits on the root cgroup. + 2. Locking The memory controller uses the following hierarchy @@ -210,6 +213,7 @@ We can alter the memory limit: NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, mega or gigabytes. NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited). +NOTE: We cannot set limits on the root cgroup any more. # cat /cgroups/0/memory.limit_in_bytes 4194304 @@ -375,7 +379,42 @@ cgroups created below it. NOTE2: This feature can be enabled/disabled per subtree. -7. TODO +7. Soft limits + +Soft limits allow for greater sharing of memory. The idea behind soft limits +is to allow control groups to use as much of the memory as needed, provided + +a. There is no memory contention +b. They do not exceed their hard limit + +When the system detects memory contention or low memory control groups +are pushed back to their soft limits. If the soft limit of each control +group is very high, they are pushed back as much as possible to make +sure that one control group does not starve the others of memory. + +Please note that soft limits is a best effort feature, it comes with +no guarantees, but it does its best to make sure that when memory is +heavily contended for, memory is allocated based on the soft limit +hints/setup. Currently soft limit based reclaim is setup such that +it gets invoked from balance_pgdat (kswapd). + +7.1 Interface + +Soft limits can be setup by using the following commands (in this example we +assume a soft limit of 256 megabytes) + +# echo 256M > memory.soft_limit_in_bytes + +If we want to change this to 1G, we can at any time use + +# echo 1G > memory.soft_limit_in_bytes + +NOTE1: Soft limits take effect over a long period of time, since they involve + reclaiming memory for balancing between memory cgroups +NOTE2: It is recommended to set the soft limit always below the hard limit, + otherwise the hard limit will take precedence. + +8. TODO 1. Add support for accounting huge pages (as a separate controller) 2. Make per-cgroup scanner reclaim not-shared pages first diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 9f59fcbf5d8..ba046b8fa92 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt @@ -54,20 +54,23 @@ features surfaced as a result: 3.1 General format of the API: struct dma_async_tx_descriptor * -async_<operation>(<op specific parameters>, - enum async_tx_flags flags, - struct dma_async_tx_descriptor *dependency, - dma_async_tx_callback callback_routine, - void *callback_parameter); +async_<operation>(<op specific parameters>, struct async_submit ctl *submit) 3.2 Supported operations: -memcpy - memory copy between a source and a destination buffer -memset - fill a destination buffer with a byte value -xor - xor a series of source buffers and write the result to a - destination buffer -xor_zero_sum - xor a series of source buffers and set a flag if the - result is zero. The implementation attempts to prevent - writes to memory +memcpy - memory copy between a source and a destination buffer +memset - fill a destination buffer with a byte value +xor - xor a series of source buffers and write the result to a + destination buffer +xor_val - xor a series of source buffers and set a flag if the + result is zero. The implementation attempts to prevent + writes to memory +pq - generate the p+q (raid6 syndrome) from a series of source buffers +pq_val - validate that a p and or q buffer are in sync with a given series of + sources +datap - (raid6_datap_recov) recover a raid6 data block and the p block + from the given sources +2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given + sources 3.3 Descriptor management: The return value is non-NULL and points to a 'descriptor' when the operation @@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to recycle (or free) the descriptor. A descriptor can be acked by one of the following methods: 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted -2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent - descriptor of a new operation. +2/ submitting an unacknowledged descriptor as a dependency to another + async_tx call will implicitly set the acknowledged state. 3/ calling async_tx_ack() on the descriptor. 3.4 When does the operation execute? @@ -119,30 +122,42 @@ of an operation. Perform a xor->copy->xor operation where each operation depends on the result from the previous operation: -void complete_xor_copy_xor(void *param) +void callback(void *param) { - printk("complete\n"); + struct completion *cmp = param; + + complete(cmp); } -int run_xor_copy_xor(struct page **xor_srcs, - int xor_src_cnt, - struct page *xor_dest, - size_t xor_len, - struct page *copy_src, - struct page *copy_dest, - size_t copy_len) +void run_xor_copy_xor(struct page **xor_srcs, + int xor_src_cnt, + struct page *xor_dest, + size_t xor_len, + struct page *copy_src, + struct page *copy_dest, + size_t copy_len) { struct dma_async_tx_descriptor *tx; + addr_conv_t addr_conv[xor_src_cnt]; + struct async_submit_ctl submit; + addr_conv_t addr_conv[NDISKS]; + struct completion cmp; + + init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL, + addr_conv); + tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit) - tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, - ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); - tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, - ASYNC_TX_DEP_ACK, tx, NULL, NULL); - tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, - ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, - tx, complete_xor_copy_xor, NULL); + submit->depend_tx = tx; + tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit); + + init_completion(&cmp); + init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx, + callback, &cmp, addr_conv); + tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit); async_tx_issue_pending_all(); + + wait_for_completion(&cmp); } See include/linux/async_tx.h for more information on the flags. See the diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index fa75220f8d3..89a47b5aff0 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -354,14 +354,6 @@ Who: Krzysztof Piotr Oledzki <ole@ans.pl> --------------------------- -What: fscher and fscpos drivers -When: June 2009 -Why: Deprecated by the new fschmd driver. -Who: Hans de Goede <hdegoede@redhat.com> - Jean Delvare <khali@linux-fr.org> - ---------------------------- - What: sysfs ui for changing p4-clockmod parameters When: September 2009 Why: See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt index 6208f55c44c..57e0b80a527 100644 --- a/Documentation/filesystems/9p.txt +++ b/Documentation/filesystems/9p.txt @@ -18,11 +18,11 @@ the 9p client is available in the form of a USENIX paper: Other applications are described in the following papers: * XCPU & Clustering - http://www.xcpu.org/xcpu-talk.pdf + http://xcpu.org/papers/xcpu-talk.pdf * KVMFS: control file system for KVM - http://www.xcpu.org/kvmfs.pdf - * CellFS: A New ProgrammingModel for the Cell BE - http://www.xcpu.org/cellfs-talk.pdf + http://xcpu.org/papers/kvmfs.pdf + * CellFS: A New Programming Model for the Cell BE + http://xcpu.org/papers/cellfs-talk.pdf * PROSE I/O: Using 9p to enable Application Partitions http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf @@ -48,6 +48,7 @@ OPTIONS (see rfdno and wfdno) virtio - connect to the next virtio channel available (from lguest or KVM with trans_virtio module) + rdma - connect to a specified RDMA channel uname=name user name to attempt mount as on the remote server. The server may override or ignore this value. Certain user @@ -59,16 +60,22 @@ OPTIONS cache=mode specifies a caching policy. By default, no caches are used. loose = no attempts are made at consistency, intended for exclusive, read-only mounts + fscache = use FS-Cache for a persistent, read-only + cache backend. debug=n specifies debug level. The debug level is a bitmask. - 0x01 = display verbose error messages - 0x02 = developer debug (DEBUG_CURRENT) - 0x04 = display 9p trace - 0x08 = display VFS trace - 0x10 = display Marshalling debug - 0x20 = display RPC debug - 0x40 = display transport debug - 0x80 = display allocation debug + 0x01 = display verbose error messages + 0x02 = developer debug (DEBUG_CURRENT) + 0x04 = display 9p trace + 0x08 = display VFS trace + 0x10 = display Marshalling debug + 0x20 = display RPC debug + 0x40 = display transport debug + 0x80 = display allocation debug + 0x100 = display protocol message debug + 0x200 = display Fid debug + 0x400 = display packet debug + 0x800 = display fscache tracing debug rfdno=n the file descriptor for reading with trans=fd @@ -100,6 +107,10 @@ OPTIONS any = v9fs does single attach and performs all operations as one user + cachetag cache tag to use the specified persistent cache. + cache tags for existing cache sessions can be listed at + /sys/fs/9p/caches. (applies only to cache=fscache) + RESOURCES ========= @@ -118,7 +129,7 @@ and export. A Linux version of the 9p server is now maintained under the npfs project on sourceforge (http://sourceforge.net/projects/npfs). The currently maintained version is the single-threaded version of the server (named spfs) -available from the same CVS repository. +available from the same SVN repository. There are user and developer mailing lists available through the v9fs project on sourceforge (http://sourceforge.net/projects/v9fs). @@ -126,7 +137,8 @@ on sourceforge (http://sourceforge.net/projects/v9fs). A stand-alone version of the module (which should build for any 2.6 kernel) is available via (http://github.com/ericvh/9p-sac/tree/master) -News and other information is maintained on SWiK (http://swik.net/v9fs). +News and other information is maintained on SWiK (http://swik.net/v9fs) +and the Wiki (http://sf.net/apps/mediawiki/v9fs/index.php). Bug reports may be issued through the kernel.org bugzilla (http://bugzilla.kernel.org) diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt index 736540045dc..23a181074f9 100644 --- a/Documentation/filesystems/sharedsubtree.txt +++ b/Documentation/filesystems/sharedsubtree.txt @@ -4,7 +4,7 @@ Shared Subtrees Contents: 1) Overview 2) Features - 3) smount command + 3) Setting mount states 4) Use-case 5) Detailed semantics 6) Quiz @@ -41,14 +41,14 @@ replicas continue to be exactly same. Here is an example: - Lets say /mnt has a mount that is shared. + Let's say /mnt has a mount that is shared. mount --make-shared /mnt - note: mount command does not yet support the --make-shared flag. - I have included a small C program which does the same by executing - 'smount /mnt shared' + Note: mount(8) command now supports the --make-shared flag, + so the sample 'smount' program is no longer needed and has been + removed. - #mount --bind /mnt /tmp + # mount --bind /mnt /tmp The above command replicates the mount at /mnt to the mountpoint /tmp and the contents of both the mounts remain identical. @@ -58,8 +58,8 @@ replicas continue to be exactly same. #ls /tmp a b c - Now lets say we mount a device at /tmp/a - #mount /dev/sd0 /tmp/a + Now let's say we mount a device at /tmp/a + # mount /dev/sd0 /tmp/a #ls /tmp/a t1 t2 t2 @@ -80,21 +80,20 @@ replicas continue to be exactly same. Here is an example: - Lets say /mnt has a mount which is shared. - #mount --make-shared /mnt + Let's say /mnt has a mount which is shared. + # mount --make-shared /mnt - Lets bind mount /mnt to /tmp - #mount --bind /mnt /tmp + Let's bind mount /mnt to /tmp + # mount --bind /mnt /tmp the new mount at /tmp becomes a shared mount and it is a replica of the mount at /mnt. - Now lets make the mount at /tmp; a slave of /mnt - #mount --make-slave /tmp - [or smount /tmp slave] + Now let's make the mount at /tmp; a slave of /mnt + # mount --make-slave /tmp - lets mount /dev/sd0 on /mnt/a - #mount /dev/sd0 /mnt/a + let's mount /dev/sd0 on /mnt/a + # mount /dev/sd0 /mnt/a #ls /mnt/a t1 t2 t3 @@ -104,9 +103,9 @@ replicas continue to be exactly same. Note the mount event has propagated to the mount at /tmp - However lets see what happens if we mount something on the mount at /tmp + However let's see what happens if we mount something on the mount at /tmp - #mount /dev/sd1 /tmp/b + # mount /dev/sd1 /tmp/b #ls /tmp/b s1 s2 s3 @@ -124,12 +123,11 @@ replicas continue to be exactly same. 2d) A unbindable mount is a unbindable private mount - lets say we have a mount at /mnt and we make is unbindable + let's say we have a mount at /mnt and we make is unbindable - #mount --make-unbindable /mnt - [ smount /mnt unbindable ] + # mount --make-unbindable /mnt - Lets try to bind mount this mount somewhere else. + Let's try to bind mount this mount somewhere else. # mount --bind /mnt /tmp mount: wrong fs type, bad option, bad superblock on /mnt, or too many mounted file systems @@ -137,149 +135,15 @@ replicas continue to be exactly same. Binding a unbindable mount is a invalid operation. -3) smount command +3) Setting mount states - Currently the mount command is not aware of shared subtree features. - Work is in progress to add the support in mount ( util-linux package ). - Till then use the following program. + The mount command (util-linux package) can be used to set mount + states: - ------------------------------------------------------------------------ - // - //this code was developed my Miklos Szeredi <miklos@szeredi.hu> - //and modified by Ram Pai <linuxram@us.ibm.com> - // sample usage: - // smount /tmp shared - // - #include <stdio.h> - #include <stdlib.h> - #include <unistd.h> - #include <string.h> - #include <sys/mount.h> - #include <sys/fsuid.h> - - #ifndef MS_REC - #define MS_REC 0x4000 /* 16384: Recursive loopback */ - #endif - - #ifndef MS_SHARED - #define MS_SHARED 1<<20 /* Shared */ - #endif - - #ifndef MS_PRIVATE - #define MS_PRIVATE 1<<18 /* Private */ - #endif - - #ifndef MS_SLAVE - #define MS_SLAVE 1<<19 /* Slave */ - #endif - - #ifndef MS_UNBINDABLE - #define MS_UNBINDABLE 1<<17 /* Unbindable */ - #endif - - int main(int argc, char *argv[]) - { - int type; - if(argc != 3) { - fprintf(stderr, "usage: %s dir " - "<rshared|rslave|rprivate|runbindable|shared|slave" - "|private|unbindable>\n" , argv[0]); - return 1; - } - - fprintf(stdout, "%s %s %s\n", argv[0], argv[1], argv[2]); - - if (strcmp(argv[2],"rshared")==0) - type=(MS_SHARED|MS_REC); - else if (strcmp(argv[2],"rslave")==0) - type=(MS_SLAVE|MS_REC); - else if (strcmp(argv[2],"rprivate")==0) - type=(MS_PRIVATE|MS_REC); - else if (strcmp(argv[2],"runbindable")==0) - type=(MS_UNBINDABLE|MS_REC); - else if (strcmp(argv[2],"shared")==0) - type=MS_SHARED; - else if (strcmp(argv[2],"slave")==0) - type=MS_SLAVE; - else if (strcmp(argv[2],"private")==0) - type=MS_PRIVATE; - else if (strcmp(argv[2],"unbindable")==0) - type=MS_UNBINDABLE; - else { - fprintf(stderr, "invalid operation: %s\n", argv[2]); - return 1; - } - setfsuid(getuid()); - - if(mount("", argv[1], "dontcare", type, "") == -1) { - perror("mount"); - return 1; - } - return 0; - } - ----------------------------------------------------------------------- - - Copy the above code snippet into smount.c - gcc -o smount smount.c - - - (i) To mark all the mounts under /mnt as shared execute the following - command: - - smount /mnt rshared - the corresponding syntax planned for mount command is - mount --make-rshared /mnt - - just to mark a mount /mnt as shared, execute the following - command: - smount /mnt shared - the corresponding syntax planned for mount command is - mount --make-shared /mnt - - (ii) To mark all the shared mounts under /mnt as slave execute the - following - - command: - smount /mnt rslave - the corresponding syntax planned for mount command is - mount --make-rslave /mnt - - just to mark a mount /mnt as slave, execute the following - command: - smount /mnt slave - the corresponding syntax planned for mount command is - mount --make-slave /mnt - - (iii) To mark all the mounts under /mnt as private execute the - following command: - - smount /mnt rprivate - the corresponding syntax planned for mount command is - mount --make-rprivate /mnt - - just to mark a mount /mnt as private, execute the following - command: - smount /mnt private - the corresponding syntax planned for mount command is - mount --make-private /mnt - - NOTE: by default all the mounts are created as private. But if - you want to change some shared/slave/unbindable mount as - private at a later point in time, this command can help. - - (iv) To mark all the mounts under /mnt as unbindable execute the - following - - command: - smount /mnt runbindable - the corresponding syntax planned for mount command is - mount --make-runbindable /mnt - - just to mark a mount /mnt as unbindable, execute the following - command: - smount /mnt unbindable - the corresponding syntax planned for mount command is - mount --make-unbindable /mnt + mount --make-shared mountpoint + mount --make-slave mountpoint + mount --make-private mountpoint + mount --make-unbindable mountpoint 4) Use cases @@ -350,7 +214,7 @@ replicas continue to be exactly same. mount --rbind / /view/v3 mount --rbind / /view/v4 - and if /usr has a versioning filesystem mounted, than that + and if /usr has a versioning filesystem mounted, then that mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and /view/v4/usr too @@ -390,7 +254,7 @@ replicas continue to be exactly same. For example: mount --make-shared /mnt - mount --bin /mnt /tmp + mount --bind /mnt /tmp The mount at /mnt and that at /tmp are both shared and belong to the same peer group. Anything mounted or unmounted under @@ -558,7 +422,7 @@ replicas continue to be exactly same. then the subtree under the unbindable mount is pruned in the new location. - eg: lets say we have the following mount tree. + eg: let's say we have the following mount tree. A / \ @@ -566,7 +430,7 @@ replicas continue to be exactly same. / \ / \ D E F G - Lets say all the mount except the mount C in the tree are + Let's say all the mount except the mount C in the tree are of a type other than unbindable. If this tree is rbound to say Z @@ -683,13 +547,13 @@ replicas continue to be exactly same. 'b' on mounts that receive propagation from mount 'B' and does not have sub-mounts within them are unmounted. - Example: Lets say 'B1', 'B2', 'B3' are shared mounts that propagate to + Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to each other. - lets say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount + let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount 'B1', 'B2' and 'B3' respectively. - lets say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on + let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on mount 'B1', 'B2' and 'B3' respectively. if 'C1' is unmounted, all the mounts that are most-recently-mounted on @@ -710,7 +574,7 @@ replicas continue to be exactly same. A cloned namespace contains all the mounts as that of the parent namespace. - Lets say 'A' and 'B' are the corresponding mounts in the parent and the + Let's say 'A' and 'B' are the corresponding mounts in the parent and the child namespace. If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to @@ -759,11 +623,11 @@ replicas continue to be exactly same. mount --make-slave /mnt At this point we have the first mount at /tmp and - its root dentry is 1. Lets call this mount 'A' + its root dentry is 1. Let's call this mount 'A' And then we have a second mount at /tmp1 with root - dentry 2. Lets call this mount 'B' + dentry 2. Let's call this mount 'B' Next we have a third mount at /mnt with root dentry - mnt. Lets call this mount 'C' + mnt. Let's call this mount 'C' 'B' is the slave of 'A' and 'C' is a slave of 'B' A -> B -> C @@ -794,7 +658,7 @@ replicas continue to be exactly same. Q3 Why is unbindable mount needed? - Lets say we want to replicate the mount tree at multiple + Let's say we want to replicate the mount tree at multiple locations within the same subtree. if one rbind mounts a tree within the same subtree 'n' times @@ -803,7 +667,7 @@ replicas continue to be exactly same. mounts. Here is a example. step 1: - lets say the root tree has just two directories with + let's say the root tree has just two directories with one vfsmount. root / \ @@ -875,7 +739,7 @@ replicas continue to be exactly same. Unclonable mounts come in handy here. step 1: - lets say the root tree has just two directories with + let's say the root tree has just two directories with one vfsmount. root / \ diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index f49eecf2e57..623f094c9d8 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -536,6 +536,7 @@ struct address_space_operations { /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); int (*launder_page) (struct page *); + int (*error_remove_page) (struct mapping *mapping, struct page *page); }; writepage: called by the VM to write a dirty page to backing store. @@ -694,6 +695,12 @@ struct address_space_operations { prevent redirtying the page, it is kept locked during the whole operation. + error_remove_page: normally set to generic_error_remove_page if truncation + is ok for this address space. Used for memory failure handling. + Setting this implies you deal with pages going away under you, + unless you have them locked or reference counts increased. + + The File Object =============== diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp index dbbe6c7025b..92267b62db5 100644 --- a/Documentation/hwmon/coretemp +++ b/Documentation/hwmon/coretemp @@ -4,7 +4,9 @@ Kernel driver coretemp Supported chips: * All Intel Core family Prefix: 'coretemp' - CPUID: family 0x6, models 0xe, 0xf, 0x16, 0x17 + CPUID: family 0x6, models 0xe (Pentium M DC), 0xf (Core 2 DC 65nm), + 0x16 (Core 2 SC 65nm), 0x17 (Penryn 45nm), + 0x1a (Nehalem), 0x1c (Atom), 0x1e (Lynnfield) Datasheet: Intel 64 and IA-32 Architectures Software Developer's Manual Volume 3A: System Programming Guide http://softwarecommunity.intel.com/Wiki/Mobility/720.htm diff --git a/Documentation/hwmon/fscher b/Documentation/hwmon/fscher deleted file mode 100644 index 64031659aff..00000000000 --- a/Documentation/hwmon/fscher +++ /dev/null @@ -1,169 +0,0 @@ -Kernel driver fscher -==================== - -Supported chips: - * Fujitsu-Siemens Hermes chip - Prefix: 'fscher' - Addresses scanned: I2C 0x73 - -Authors: - Reinhard Nissl <rnissl@gmx.de> based on work - from Hermann Jung <hej@odn.de>, - Frodo Looijaard <frodol@dds.nl>, - Philip Edelbrock <phil@netroedge.com> - -Description ------------ - -This driver implements support for the Fujitsu-Siemens Hermes chip. It is -described in the 'Register Set Specification BMC Hermes based Systemboard' -from Fujitsu-Siemens. - -The Hermes chip implements a hardware-based system management, e.g. for -controlling fan speed and core voltage. There is also a watchdog counter on -the chip which can trigger an alarm and even shut the system down. - -The chip provides three temperature values (CPU, motherboard and -auxiliary), three voltage values (+12V, +5V and battery) and three fans -(power supply, CPU and auxiliary). - -Temperatures are measured in degrees Celsius. The resolution is 1 degree. - -Fan rotation speeds are reported in RPM (rotations per minute). The value -can be divided by a programmable divider (1, 2 or 4) which is stored on -the chip. - -Voltage sensors (also known as "in" sensors) report their values in volts. - -All values are reported as final values from the driver. There is no need -for further calculations. - - -Detailed description --------------------- - -Below you'll find a single line description of all the bit values. With -this information, you're able to decode e. g. alarms, wdog, etc. To make -use of the watchdog, you'll need to set the watchdog time and enable the -watchdog. After that it is necessary to restart the watchdog time within -the specified period of time, or a system reset will occur. - -* revision - READING & 0xff = 0x??: HERMES revision identification - -* alarms - READING & 0x80 = 0x80: CPU throttling active - READING & 0x80 = 0x00: CPU running at full speed - - READING & 0x10 = 0x10: software event (see control:1) - READING & 0x10 = 0x00: no software event - - READING & 0x08 = 0x08: watchdog event (see wdog:2) - READING & 0x08 = 0x00: no watchdog event - - READING & 0x02 = 0x02: thermal event (see temp*:1) - READING & 0x02 = 0x00: no thermal event - - READING & 0x01 = 0x01: fan event (see fan*:1) - READING & 0x01 = 0x00: no fan event - - READING & 0x13 ! 0x00: ALERT LED is flashing - -* control - READING & 0x01 = 0x01: software event - READING & 0x01 = 0x00: no software event - - WRITING & 0x01 = 0x01: set software event - WRITING & 0x01 = 0x00: clear software event - -* watchdog_control - READING & 0x80 = 0x80: power off on watchdog event while thermal event - READING & 0x80 = 0x00: watchdog power off disabled (just system reset enabled) - - READING & 0x40 = 0x40: watchdog timebase 60 seconds (see also wdog:1) - READING & 0x40 = 0x00: watchdog timebase 2 seconds - - READING & 0x10 = 0x10: watchdog enabled - READING & 0x10 = 0x00: watchdog disabled - - WRITING & 0x80 = 0x80: enable "power off on watchdog event while thermal event" - WRITING & 0x80 = 0x00: disable "power off on watchdog event while thermal event" - - WRITING & 0x40 = 0x40: set watchdog timebase to 60 seconds - WRITING & 0x40 = 0x00: set watchdog timebase to 2 seconds - - WRITING & 0x20 = 0x20: disable watchdog - - WRITING & 0x10 = 0x10: enable watchdog / restart watchdog time - -* watchdog_state - READING & 0x02 = 0x02: watchdog system reset occurred - READING & 0x02 = 0x00: no watchdog system reset occurred - - WRITING & 0x02 = 0x02: clear watchdog event - -* watchdog_preset - READING & 0xff = 0x??: configured watch dog time in units (see wdog:3 0x40) - - WRITING & 0xff = 0x??: configure watch dog time in units - -* in* (0: +5V, 1: +12V, 2: onboard 3V battery) - READING: actual voltage value - -* temp*_status (1: CPU sensor, 2: onboard sensor, 3: auxiliary sensor) - READING & 0x02 = 0x02: thermal event (overtemperature) - READING & 0x02 = 0x00: no thermal event - - READING & 0x01 = 0x01: sensor is working - READING & 0x01 = 0x00: sensor is faulty - - WRITING & 0x02 = 0x02: clear thermal event - -* temp*_input (1: CPU sensor, 2: onboard sensor, 3: auxiliary sensor) - READING: actual temperature value - -* fan*_status (1: power supply fan, 2: CPU fan, 3: auxiliary fan) - READING & 0x04 = 0x04: fan event (fan fault) - READING & 0x04 = 0x00: no fan event - - WRITING & 0x04 = 0x04: clear fan event - -* fan*_div (1: power supply fan, 2: CPU fan, 3: auxiliary fan) - Divisors 2,4 and 8 are supported, both for reading and writing - -* fan*_pwm (1: power supply fan, 2: CPU fan, 3: auxiliary fan) - READING & 0xff = 0x00: fan may be switched off - READING & 0xff = 0x01: fan must run at least at minimum speed (supply: 6V) - READING & 0xff = 0xff: fan must run at maximum speed (supply: 12V) - READING & 0xff = 0x??: fan must run at least at given speed (supply: 6V..12V) - - WRITING & 0xff = 0x00: fan may be switched off - WRITING & 0xff = 0x01: fan must run at least at minimum speed (supply: 6V) - WRITING & 0xff = 0xff: fan must run at maximum speed (supply: 12V) - WRITING & 0xff = 0x??: fan must run at least at given speed (supply: 6V..12V) - -* fan*_input (1: power supply fan, 2: CPU fan, 3: auxiliary fan) - READING: actual RPM value - - -Limitations ------------ - -* Measuring fan speed -It seems that the chip counts "ripples" (typical fans produce 2 ripples per -rotation while VERAX fans produce 18) in a 9-bit register. This register is -read out every second, then the ripple prescaler (2, 4 or 8) is applied and -the result is stored in the 8 bit output register. Due to the limitation of -the counting register to 9 bits, it is impossible to measure a VERAX fan -properly (even with a prescaler of 8). At its maximum speed of 3500 RPM the -fan produces 1080 ripples per second which causes the counting register to -overflow twice, leading to only 186 RPM. - -* Measuring input voltages -in2 ("battery") reports the voltage of the onboard lithium battery and not -+3.3V from the power supply. - -* Undocumented features -Fujitsu-Siemens Computers has not documented all features of the chip so -far. Their software, System Guard, shows that there are a still some -features which cannot be controlled by this implementation. diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index aafca0a8f66..947374977ca 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -135,6 +135,7 @@ Code Seq# Include File Comments <http://mikonos.dia.unisa.it/tcfs> 'l' 40-7F linux/udf_fs_i.h in development: <http://sourceforge.net/projects/linux-udf/> +'m' 00-09 linux/mmtimer.h 'm' all linux/mtio.h conflict! 'm' all linux/soundcard.h conflict! 'm' all linux/synclink.h conflict! diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index f3355b6812d..bb3bf38f03d 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -65,6 +65,22 @@ INSTALL_PATH INSTALL_PATH specifies where to place the updated kernel and system map images. Default is /boot, but you can set it to other values. +INSTALLKERNEL +-------------------------------------------------- +Install script called when using "make install". +The default name is "installkernel". + +The script will be called with the following arguments: + $1 - kernel version + $2 - kernel image file + $3 - kernel map file + $4 - default install path (use root directory if blank) + +The implmentation of "make install" is architecture specific +and it may differ from the above. + +INSTALLKERNEL is provided to enable the possibility to +specify a custom installer when cross compiling a kernel. MODLIB -------------------------------------------------- diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index d76cfd8712e..71c602d6168 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -18,6 +18,7 @@ This document describes the Linux kernel Makefiles. --- 3.9 Dependency tracking --- 3.10 Special Rules --- 3.11 $(CC) support functions + --- 3.12 $(LD) support functions === 4 Host Program support --- 4.1 Simple Host Program @@ -435,14 +436,14 @@ more details, with real examples. The second argument is optional, and if supplied will be used if first argument is not supported. - ld-option - ld-option is used to check if $(CC) when used to link object files + cc-ldoption + cc-ldoption is used to check if $(CC) when used to link object files supports the given option. An optional second option may be specified if first option are not supported. Example: #arch/i386/kernel/Makefile - vsyscall-flags += $(call ld-option, -Wl$(comma)--hash-style=sysv) + vsyscall-flags += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) In the above example, vsyscall-flags will be assigned the option -Wl$(comma)--hash-style=sysv if it is supported by $(CC). @@ -570,6 +571,19 @@ more details, with real examples. endif endif +--- 3.12 $(LD) support functions + + ld-option + ld-option is used to check if $(LD) supports the supplied option. + ld-option takes two options as arguments. + The second argument is an optional option that can be used if the + first option is not supported by $(LD). + + Example: + #Makefile + LDFLAGS_vmlinux += $(call really-ld-option, -X) + + === 4 Host Program support Kbuild supports building executables on the host for use during the diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 1458448436c..62682500878 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -96,13 +96,16 @@ handles that the Linux kernel will allocate. When you get lots of error messages about running out of file handles, you might want to increase this limit. -The three values in file-nr denote the number of allocated -file handles, the number of unused file handles and the maximum -number of file handles. When the allocated file handles come -close to the maximum, but the number of unused file handles is -significantly greater than 0, you've encountered a peak in your -usage of file handles and you don't need to increase the maximum. - +Historically, the three values in file-nr denoted the number of +allocated file handles, the number of allocated but unused file +handles, and the maximum number of file handles. Linux 2.6 always +reports 0 as the number of free file handles -- this is not an +error, it just means that the number of allocated file handles +exactly matches the number of used file handles. + +Attempts to allocate more file descriptors than file-max are +reported with printk, look for "VFS: file-max limit <number> +reached". ============================================================== nr_open: diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index b3d8b492274..a028b92001e 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -22,6 +22,7 @@ show up in /proc/sys/kernel: - callhome [ S390 only ] - auto_msgmni - core_pattern +- core_pipe_limit - core_uses_pid - ctrl-alt-del - dentry-state @@ -135,6 +136,27 @@ core_pattern is used to specify a core dumpfile pattern name. ============================================================== +core_pipe_limit: + +This sysctl is only applicable when core_pattern is configured to pipe core +files to user space helper a (when the first character of core_pattern is a '|', +see above). When collecting cores via a pipe to an application, it is +occasionally usefull for the collecting application to gather data about the +crashing process from its /proc/pid directory. In order to do this safely, the +kernel must wait for the collecting process to exit, so as not to remove the +crashing processes proc files prematurely. This in turn creates the possibility +that a misbehaving userspace collecting process can block the reaping of a +crashed process simply by never exiting. This sysctl defends against that. It +defines how many concurrent crashing processes may be piped to user space +applications in parallel. If this value is exceeded, then those crashing +processes above that value are noted via the kernel log and their cores are +skipped. 0 is a special value, indicating that unlimited processes may be +captured in parallel, but that no waiting will take place (i.e. the collecting +process is not guaranteed access to /proc/<crahing pid>/). This value defaults +to 0. + +============================================================== + core_uses_pid: The default coredump filename is "core". By setting diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index e6fb1ec2744..a6e360d2055 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/vm: - legacy_va_layout - lowmem_reserve_ratio - max_map_count +- memory_failure_early_kill +- memory_failure_recovery - min_free_kbytes - min_slab_ratio - min_unmapped_ratio @@ -53,7 +55,6 @@ Currently, these files are in /proc/sys/vm: - vfs_cache_pressure - zone_reclaim_mode - ============================================================== block_dump @@ -275,6 +276,44 @@ e.g., up to one or two maps per allocation. The default value is 65536. +============================================================= + +memory_failure_early_kill: + +Control how to kill processes when uncorrected memory error (typically +a 2bit error in a memory module) is detected in the background by hardware +that cannot be handled by the kernel. In some cases (like the page +still having a valid copy on disk) the kernel will handle the failure +transparently without affecting any applications. But if there is +no other uptodate copy of the data it will kill to prevent any data +corruptions from propagating. + +1: Kill all processes that have the corrupted and not reloadable page mapped +as soon as the corruption is detected. Note this is not supported +for a few types of pages, like kernel internally allocated data or +the swap cache, but works for the majority of user pages. + +0: Only unmap the corrupted page from all processes and only kill a process +who tries to access it. + +The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can +handle this if they want to. + +This is only active on architectures/platforms with advanced machine +check handling and depends on the hardware capabilities. + +Applications can override this setting individually with the PR_MCE_KILL prctl + +============================================================== + +memory_failure_recovery + +Enable memory failure recovery (when supported by the platform) + +1: Attempt recovery. + +0: Always panic on a memory failure. + ============================================================== min_free_kbytes: diff --git a/Documentation/vm/.gitignore b/Documentation/vm/.gitignore index 33e8a023df0..09b164a5700 100644 --- a/Documentation/vm/.gitignore +++ b/Documentation/vm/.gitignore @@ -1 +1,2 @@ +page-types slabinfo diff --git a/Documentation/vm/locking b/Documentation/vm/locking index f366fa95617..25fadb44876 100644 --- a/Documentation/vm/locking +++ b/Documentation/vm/locking @@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the mm start up ... this is a loose form of stability on mm_users. For example, it is used in copy_mm to protect against a racing tlb_gather_mmu single address space optimization, so that the zap_page_range (from -vmtruncate) does not lose sending ipi's to cloned threads that might +truncate) does not lose sending ipi's to cloned threads that might be spawned underneath it and go to user mode to drag in pte's into tlbs. swap_lock diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c index 3eda8ea0085..fa1a30d9e9d 100644 --- a/Documentation/vm/page-types.c +++ b/Documentation/vm/page-types.c @@ -5,6 +5,7 @@ * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> */ +#define _LARGEFILE64_SOURCE #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -13,12 +14,33 @@ #include <string.h> #include <getopt.h> #include <limits.h> +#include <assert.h> #include <sys/types.h> #include <sys/errno.h> #include <sys/fcntl.h> /* + * pagemap kernel ABI bits + */ + +#define PM_ENTRY_BYTES sizeof(uint64_t) +#define PM_STATUS_BITS 3 +#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) +#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) +#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) +#define PM_PSHIFT_BITS 6 +#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) +#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) +#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) +#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) +#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) + +#define PM_PRESENT PM_STATUS(4LL) +#define PM_SWAP PM_STATUS(2LL) + + +/* * kernel page flags */ @@ -126,6 +148,14 @@ static int nr_addr_ranges; static unsigned long opt_offset[MAX_ADDR_RANGES]; static unsigned long opt_size[MAX_ADDR_RANGES]; +#define MAX_VMAS 10240 +static int nr_vmas; +static unsigned long pg_start[MAX_VMAS]; +static unsigned long pg_end[MAX_VMAS]; +static unsigned long voffset; + +static int pagemap_fd; + #define MAX_BIT_FILTERS 64 static int nr_bit_filters; static uint64_t opt_mask[MAX_BIT_FILTERS]; @@ -135,7 +165,6 @@ static int page_size; #define PAGES_BATCH (64 << 10) /* 64k pages */ static int kpageflags_fd; -static uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; #define HASH_SHIFT 13 #define HASH_SIZE (1 << HASH_SHIFT) @@ -158,6 +187,11 @@ static uint64_t page_flags[HASH_SIZE]; type __min2 = (y); \ __min1 < __min2 ? __min1 : __min2; }) +#define max_t(type, x, y) ({ \ + type __max1 = (x); \ + type __max2 = (y); \ + __max1 > __max2 ? __max1 : __max2; }) + static unsigned long pages2mb(unsigned long pages) { return (pages * page_size) >> 20; @@ -224,26 +258,34 @@ static char *page_flag_longname(uint64_t flags) static void show_page_range(unsigned long offset, uint64_t flags) { static uint64_t flags0; + static unsigned long voff; static unsigned long index; static unsigned long count; - if (flags == flags0 && offset == index + count) { + if (flags == flags0 && offset == index + count && + (!opt_pid || voffset == voff + count)) { count++; return; } - if (count) - printf("%lu\t%lu\t%s\n", + if (count) { + if (opt_pid) + printf("%lx\t", voff); + printf("%lx\t%lx\t%s\n", index, count, page_flag_name(flags0)); + } flags0 = flags; index = offset; + voff = voffset; count = 1; } static void show_page(unsigned long offset, uint64_t flags) { - printf("%lu\t%s\n", offset, page_flag_name(flags)); + if (opt_pid) + printf("%lx\t", voffset); + printf("%lx\t%s\n", offset, page_flag_name(flags)); } static void show_summary(void) @@ -383,6 +425,8 @@ static void walk_pfn(unsigned long index, unsigned long count) lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); while (count) { + uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; + batch = min_t(unsigned long, count, PAGES_BATCH); n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); if (n == 0) @@ -404,6 +448,81 @@ static void walk_pfn(unsigned long index, unsigned long count) } } + +#define PAGEMAP_BATCH 4096 +static unsigned long task_pfn(unsigned long pgoff) +{ + static uint64_t buf[PAGEMAP_BATCH]; + static unsigned long start; + static long count; + uint64_t pfn; + + if (pgoff < start || pgoff >= start + count) { + if (lseek64(pagemap_fd, + (uint64_t)pgoff * PM_ENTRY_BYTES, + SEEK_SET) < 0) { + perror("pagemap seek"); + exit(EXIT_FAILURE); + } + count = read(pagemap_fd, buf, sizeof(buf)); + if (count == 0) + return 0; + if (count < 0) { + perror("pagemap read"); + exit(EXIT_FAILURE); + } + if (count % PM_ENTRY_BYTES) { + fatal("pagemap read not aligned.\n"); + exit(EXIT_FAILURE); + } + count /= PM_ENTRY_BYTES; + start = pgoff; + } + + pfn = buf[pgoff - start]; + if (pfn & PM_PRESENT) + pfn = PM_PFRAME(pfn); + else + pfn = 0; + + return pfn; +} + +static void walk_task(unsigned long index, unsigned long count) +{ + int i = 0; + const unsigned long end = index + count; + + while (index < end) { + + while (pg_end[i] <= index) + if (++i >= nr_vmas) + return; + if (pg_start[i] >= end) + return; + + voffset = max_t(unsigned long, pg_start[i], index); + index = min_t(unsigned long, pg_end[i], end); + + assert(voffset < index); + for (; voffset < index; voffset++) { + unsigned long pfn = task_pfn(voffset); + if (pfn) + walk_pfn(pfn, 1); + } + } +} + +static void add_addr_range(unsigned long offset, unsigned long size) +{ + if (nr_addr_ranges >= MAX_ADDR_RANGES) + fatal("too many addr ranges\n"); + + opt_offset[nr_addr_ranges] = offset; + opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset); + nr_addr_ranges++; +} + static void walk_addr_ranges(void) { int i; @@ -415,10 +534,13 @@ static void walk_addr_ranges(void) } if (!nr_addr_ranges) - walk_pfn(0, ULONG_MAX); + add_addr_range(0, ULONG_MAX); for (i = 0; i < nr_addr_ranges; i++) - walk_pfn(opt_offset[i], opt_size[i]); + if (!opt_pid) + walk_pfn(opt_offset[i], opt_size[i]); + else + walk_task(opt_offset[i], opt_size[i]); close(kpageflags_fd); } @@ -446,8 +568,8 @@ static void usage(void) " -r|--raw Raw mode, for kernel developers\n" " -a|--addr addr-spec Walk a range of pages\n" " -b|--bits bits-spec Walk pages with specified bits\n" -#if 0 /* planned features */ " -p|--pid pid Walk process address space\n" +#if 0 /* planned features */ " -f|--file filename Walk file address space\n" #endif " -l|--list Show page details in ranges\n" @@ -459,7 +581,7 @@ static void usage(void) " N+M pages range from N to N+M-1\n" " N,M pages range from N to M-1\n" " N, pages range from N to end\n" -" ,M pages range from 0 to M\n" +" ,M pages range from 0 to M-1\n" "bits-spec:\n" " bit1,bit2 (flags & (bit1|bit2)) != 0\n" " bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" @@ -496,21 +618,57 @@ static unsigned long long parse_number(const char *str) static void parse_pid(const char *str) { + FILE *file; + char buf[5000]; + opt_pid = parse_number(str); -} -static void parse_file(const char *name) -{ + sprintf(buf, "/proc/%d/pagemap", opt_pid); + pagemap_fd = open(buf, O_RDONLY); + if (pagemap_fd < 0) { + perror(buf); + exit(EXIT_FAILURE); + } + + sprintf(buf, "/proc/%d/maps", opt_pid); + file = fopen(buf, "r"); + if (!file) { + perror(buf); + exit(EXIT_FAILURE); + } + + while (fgets(buf, sizeof(buf), file) != NULL) { + unsigned long vm_start; + unsigned long vm_end; + unsigned long long pgoff; + int major, minor; + char r, w, x, s; + unsigned long ino; + int n; + + n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu", + &vm_start, + &vm_end, + &r, &w, &x, &s, + &pgoff, + &major, &minor, + &ino); + if (n < 10) { + fprintf(stderr, "unexpected line: %s\n", buf); + continue; + } + pg_start[nr_vmas] = vm_start / page_size; + pg_end[nr_vmas] = vm_end / page_size; + if (++nr_vmas >= MAX_VMAS) { + fprintf(stderr, "too many VMAs\n"); + break; + } + } + fclose(file); } -static void add_addr_range(unsigned long offset, unsigned long size) +static void parse_file(const char *name) { - if (nr_addr_ranges >= MAX_ADDR_RANGES) - fatal("too much addr ranges\n"); - - opt_offset[nr_addr_ranges] = offset; - opt_size[nr_addr_ranges] = size; - nr_addr_ranges++; } static void parse_addr_range(const char *optarg) @@ -676,8 +834,10 @@ int main(int argc, char *argv[]) } } + if (opt_list && opt_pid) + printf("voffset\t"); if (opt_list == 1) - printf("offset\tcount\tflags\n"); + printf("offset\tlen\tflags\n"); if (opt_list == 2) printf("offset\tflags\n"); diff --git a/MAINTAINERS b/MAINTAINERS index 7c1c0b05b29..e797c4d48cf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -257,12 +257,6 @@ W: http://www.lesswatts.org/projects/acpi/ S: Supported F: drivers/acpi/fan.c -ACPI PCI HOTPLUG DRIVER -M: Kristen Carlson Accardi <kristen.c.accardi@intel.com> -L: linux-pci@vger.kernel.org -S: Supported -F: drivers/pci/hotplug/acpi* - ACPI THERMAL DRIVER M: Zhang Rui <rui.zhang@intel.com> L: linux-acpi@vger.kernel.org @@ -2331,7 +2325,9 @@ S: Orphan F: drivers/hwmon/ HARDWARE RANDOM NUMBER GENERATOR CORE -S: Orphan +M: Matt Mackall <mpm@selenic.com> +M: Herbert Xu <herbert@gondor.apana.org.au> +S: Odd fixes F: Documentation/hw_random.txt F: drivers/char/hw_random/ F: include/linux/hw_random.h @@ -4003,11 +3999,11 @@ F: Documentation/PCI/ F: drivers/pci/ F: include/linux/pci* -PCIE HOTPLUG DRIVER -M: Kristen Carlson Accardi <kristen.c.accardi@intel.com> +PCI HOTPLUG +M: Jesse Barnes <jbarnes@virtuousgeek.org> L: linux-pci@vger.kernel.org S: Supported -F: drivers/pci/pcie/ +F: drivers/pci/hotplug PCMCIA SUBSYSTEM P: Linux PCMCIA Team @@ -4670,12 +4666,6 @@ F: drivers/serial/serial_lh7a40x.c F: drivers/usb/gadget/lh7a40* F: drivers/usb/host/ohci-lh7a40* -SHPC HOTPLUG DRIVER -M: Kristen Carlson Accardi <kristen.c.accardi@intel.com> -L: linux-pci@vger.kernel.org -S: Supported -F: drivers/pci/hotplug/shpchp* - SIMPLE FIRMWARE INTERFACE (SFI) P: Len Brown M: lenb@kernel.org @@ -4687,7 +4677,6 @@ F: arch/x86/kernel/*sfi* F: drivers/sfi/ F: include/linux/sfi*.h - SIMTEC EB110ATX (Chalice CATS) P: Ben Dooks M: Vincent Sanders <support@simtec.co.uk> @@ -179,9 +179,46 @@ SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ # Alternatively CROSS_COMPILE can be set in the environment. # Default value for CROSS_COMPILE is not to prefix executables # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile +# +# To force ARCH and CROSS_COMPILE settings include kernel.* files +# in the kernel tree - do not patch this file. export KBUILD_BUILDHOST := $(SUBARCH) -ARCH ?= $(SUBARCH) -CROSS_COMPILE ?= + +# Kbuild save the ARCH and CROSS_COMPILE setting in kernel.* files. +# Restore these settings and check that user did not specify +# conflicting values. + +saved_arch := $(shell cat include/generated/kernel.arch 2> /dev/null) +saved_cross := $(shell cat include/generated/kernel.cross 2> /dev/null) + +ifneq ($(CROSS_COMPILE),) + ifneq ($(saved_cross),) + ifneq ($(CROSS_COMPILE),$(saved_cross)) + $(error CROSS_COMPILE changed from \ + "$(saved_cross)" to \ + to "$(CROSS_COMPILE)". \ + Use "make mrproper" to fix it up) + endif + endif +else + CROSS_COMPILE := $(saved_cross) +endif + +ifneq ($(ARCH),) + ifneq ($(saved_arch),) + ifneq ($(saved_arch),$(ARCH)) + $(error ARCH changed from \ + "$(saved_arch)" to "$(ARCH)". \ + Use "make mrproper" to fix it up) + endif + endif +else + ifneq ($(saved_arch),) + ARCH := $(saved_arch) + else + ARCH := $(SUBARCH) + endif +endif # Architecture as present in compile.h UTS_MACHINE := $(ARCH) @@ -315,6 +352,7 @@ OBJCOPY = $(CROSS_COMPILE)objcopy OBJDUMP = $(CROSS_COMPILE)objdump AWK = awk GENKSYMS = scripts/genksyms/genksyms +INSTALLKERNEL := installkernel DEPMOD = /sbin/depmod KALLSYMS = scripts/kallsyms PERL = perl @@ -353,7 +391,8 @@ KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC -export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE +export CPP AR NM STRIP OBJCOPY OBJDUMP +export MAKE AWK GENKSYMS INSTALLKERNEL PERL UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS @@ -444,6 +483,11 @@ ifeq ($(config-targets),1) include $(srctree)/arch/$(SRCARCH)/Makefile export KBUILD_DEFCONFIG KBUILD_KCONFIG +# save ARCH & CROSS_COMPILE settings +$(shell mkdir -p include/generated && \ + echo $(ARCH) > include/generated/kernel.arch && \ + echo $(CROSS_COMPILE) > include/generated/kernel.cross) + config: scripts_basic outputmakefile FORCE $(Q)mkdir -p include/linux include/config $(Q)$(MAKE) $(build)=scripts/kconfig $@ @@ -571,6 +615,9 @@ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) # revert to pre-gcc-4.4 behaviour of .eh_frame KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm) +# conserve stack if available +KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) + # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments # But warn user when we do so warn-assign = \ @@ -591,12 +638,12 @@ endif # Use --build-id when available. LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\ - $(call ld-option, -Wl$(comma)--build-id,)) + $(call cc-ldoption, -Wl$(comma)--build-id,)) LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID) LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID) ifeq ($(CONFIG_STRIP_ASM_SYMS),y) -LDFLAGS_vmlinux += -X +LDFLAGS_vmlinux += $(call ld-option, -X,) endif # Default kernel image to build when no specific target is given. @@ -980,11 +1027,6 @@ prepare0: archprepare FORCE # All the preparing.. prepare: prepare0 -# Leave this as default for preprocessing vmlinux.lds.S, which is now -# done in arch/$(ARCH)/kernel/Makefile - -export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH) - # The asm symlink changes when $(ARCH) changes. # Detect this and ask user to run make mrproper # If asm is a stale symlink (point to dir that does not exist) remove it diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h index 25da0017ec8..e42823e954a 100644 --- a/arch/alpha/include/asm/fcntl.h +++ b/arch/alpha/include/asm/fcntl.h @@ -26,6 +26,8 @@ #define F_GETOWN 6 /* for sockets. */ #define F_SETSIG 10 /* for sockets. */ #define F_GETSIG 11 /* for sockets. */ +#define F_SETOWN_EX 12 +#define F_GETOWN_EX 13 /* for posix fcntl() and lockf() */ #define F_RDLCK 1 diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h index 547e90951ce..3f390e8cc0b 100644 --- a/arch/alpha/include/asm/smp.h +++ b/arch/alpha/include/asm/smp.h @@ -47,7 +47,7 @@ extern struct cpuinfo_alpha cpu_data[NR_CPUS]; extern int smp_num_cpus; extern void arch_send_call_function_single_ipi(int cpu); -extern void arch_send_call_function_ipi(cpumask_t mask); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); #else /* CONFIG_SMP */ diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h index b4f284c72ff..36b3a30ba0e 100644 --- a/arch/alpha/include/asm/topology.h +++ b/arch/alpha/include/asm/topology.h @@ -22,23 +22,6 @@ static inline int cpu_to_node(int cpu) return node; } -static inline cpumask_t node_to_cpumask(int node) -{ - cpumask_t node_cpu_mask = CPU_MASK_NONE; - int cpu; - - for_each_online_cpu(cpu) { - if (cpu_to_node(cpu) == node) - cpu_set(cpu, node_cpu_mask); - } - -#ifdef DEBUG_NUMA - printk("node %d: cpu_mask: %016lx\n", node, node_cpu_mask); -#endif - - return node_cpu_mask; -} - extern struct cpumask node_to_cpumask_map[]; /* FIXME: This is dumb, recalculating every time. But simple. */ static const struct cpumask *cpumask_of_node(int node) @@ -55,7 +38,6 @@ static const struct cpumask *cpumask_of_node(int node) return &node_to_cpumask_map[node]; } -#define pcibus_to_cpumask(bus) (cpu_online_map) #define cpumask_of_pcibus(bus) (cpu_online_mask) #endif /* !CONFIG_NUMA */ diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index e302daecbe5..8e059e58b0a 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -1016,7 +1016,7 @@ marvel_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *m { struct marvel_agp_aperture *aper = agp->aperture.sysdata; return iommu_bind(aper->arena, aper->pg_start + pg_start, - mem->page_count, mem->memory); + mem->page_count, mem->pages); } static int diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c index 319fcb74611..76686497b1e 100644 --- a/arch/alpha/kernel/core_titan.c +++ b/arch/alpha/kernel/core_titan.c @@ -680,7 +680,7 @@ titan_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *me { struct titan_agp_aperture *aper = agp->aperture.sysdata; return iommu_bind(aper->arena, aper->pg_start + pg_start, - mem->page_count, mem->memory); + mem->page_count, mem->pages); } static int diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h index 00edd04b585..85457b2d451 100644 --- a/arch/alpha/kernel/pci_impl.h +++ b/arch/alpha/kernel/pci_impl.h @@ -198,7 +198,7 @@ extern unsigned long size_for_memory(unsigned long max); extern int iommu_reserve(struct pci_iommu_arena *, long, long); extern int iommu_release(struct pci_iommu_arena *, long, long); -extern int iommu_bind(struct pci_iommu_arena *, long, long, unsigned long *); +extern int iommu_bind(struct pci_iommu_arena *, long, long, struct page **); extern int iommu_unbind(struct pci_iommu_arena *, long, long); diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index d15aedfe606..8449504f5e0 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -876,7 +876,7 @@ iommu_release(struct pci_iommu_arena *arena, long pg_start, long pg_count) int iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count, - unsigned long *physaddrs) + struct page **pages) { unsigned long flags; unsigned long *ptes; @@ -896,7 +896,7 @@ iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count, } for(i = 0, j = pg_start; i < pg_count; i++, j++) - ptes[j] = mk_iommu_pte(physaddrs[i]); + ptes[j] = mk_iommu_pte(page_to_phys(pages[i])); spin_unlock_irqrestore(&arena->lock, flags); diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 3a2fb7a02db..289039bb6bb 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -19,7 +19,6 @@ #include <linux/ptrace.h> #include <linux/slab.h> #include <linux/user.h> -#include <linux/utsname.h> #include <linux/time.h> #include <linux/major.h> #include <linux/stat.h> diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index b1fe5674c3a..42aa078a5e4 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -548,16 +548,16 @@ setup_profiling_timer(unsigned int multiplier) static void -send_ipi_message(cpumask_t to_whom, enum ipi_message_type operation) +send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation) { int i; mb(); - for_each_cpu_mask(i, to_whom) + for_each_cpu(i, to_whom) set_bit(operation, &ipi_data[i].bits); mb(); - for_each_cpu_mask(i, to_whom) + for_each_cpu(i, to_whom) wripir(i); } @@ -624,7 +624,7 @@ smp_send_reschedule(int cpu) printk(KERN_WARNING "smp_send_reschedule: Sending IPI to self.\n"); #endif - send_ipi_message(cpumask_of_cpu(cpu), IPI_RESCHEDULE); + send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); } void @@ -636,17 +636,17 @@ smp_send_stop(void) if (hard_smp_processor_id() != boot_cpu_id) printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n"); #endif - send_ipi_message(to_whom, IPI_CPU_STOP); + send_ipi_message(&to_whom, IPI_CPU_STOP); } -void arch_send_call_function_ipi(cpumask_t mask) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { send_ipi_message(mask, IPI_CALL_FUNC); } void arch_send_call_function_single_ipi(int cpu) { - send_ipi_message(cpumask_of_cpu(cpu), IPI_CALL_FUNC_SINGLE); + send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); } static void diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 54661125a8b..a73caaf6676 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -14,7 +14,7 @@ LDFLAGS_vmlinux :=-p --no-undefined -X ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 endif -CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) + OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S GZFLAGS :=-9 #KBUILD_CFLAGS +=-pipe @@ -279,7 +279,7 @@ define archhelp echo ' (supply initrd image via make variable INITRD=<path>)' echo ' install - Install uncompressed kernel' echo ' zinstall - Install compressed kernel' - echo ' Install using (your) ~/bin/installkernel or' - echo ' (distribution) /sbin/installkernel or' + echo ' Install using (your) ~/bin/$(INSTALLKERNEL) or' + echo ' (distribution) /sbin/$(INSTALLKERNEL) or' echo ' install to $$(INSTALL_PATH) and run lilo' endef diff --git a/arch/arm/boot/install.sh b/arch/arm/boot/install.sh index 9f9bed20734..06ea7d42ce8 100644 --- a/arch/arm/boot/install.sh +++ b/arch/arm/boot/install.sh @@ -21,8 +21,8 @@ # # User may have a custom install script -if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi -if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi if [ "$(basename $2)" = "zImage" ]; then # Compressed install diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 1a711ea8418..fd03fb63a33 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -334,14 +334,14 @@ static inline void outer_flush_range(unsigned long start, unsigned long end) #ifndef CONFIG_CPU_CACHE_VIPT static inline void flush_cache_mm(struct mm_struct *mm) { - if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) __cpuc_flush_user_all(); } static inline void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) __cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end), vma->vm_flags); } @@ -349,7 +349,7 @@ flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long static inline void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn) { - if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) { + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { unsigned long addr = user_addr & PAGE_MASK; __cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags); } @@ -360,7 +360,7 @@ flush_ptrace_access(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *kaddr, unsigned long len, int write) { - if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) { + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { unsigned long addr = (unsigned long)kaddr; __cpuc_coherent_kern_range(addr, addr + len); } diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h index 83e6ba338e2..1a8c7279a28 100644 --- a/arch/arm/include/asm/hardware/iop3xx-adma.h +++ b/arch/arm/include/asm/hardware/iop3xx-adma.h @@ -187,11 +187,74 @@ union iop3xx_desc { void *ptr; }; +/* No support for p+q operations */ +static inline int +iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op) +{ + BUG(); + return 0; +} + +static inline void +iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt, + unsigned long flags) +{ + BUG(); +} + +static inline void +iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr) +{ + BUG(); +} + +static inline void +iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx, + dma_addr_t addr, unsigned char coef) +{ + BUG(); +} + +static inline int +iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op) +{ + BUG(); + return 0; +} + +static inline void +iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, + unsigned long flags) +{ + BUG(); +} + +static inline void +iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) +{ + BUG(); +} + +#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr + +static inline void +iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx, + dma_addr_t *src) +{ + BUG(); +} + static inline int iop_adma_get_max_xor(void) { return 32; } +static inline int iop_adma_get_max_pq(void) +{ + BUG(); + return 0; +} + static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) { int id = chan->device->id; @@ -332,6 +395,11 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt, return slot_cnt; } +static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc) +{ + return 0; +} + static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan) { @@ -349,6 +417,14 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, return 0; } + +static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan) +{ + BUG(); + return 0; +} + static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan) { @@ -756,13 +832,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, hw_desc->src[0] = val; } -static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) +static inline enum sum_check_flags +iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) { struct iop3xx_desc_aau *hw_desc = desc->hw_desc; struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); - return desc_ctrl.zero_result_err; + return desc_ctrl.zero_result_err << SUM_CHECK_P; } static inline void iop_chan_append(struct iop_adma_chan *chan) diff --git a/arch/arm/include/asm/hardware/iop_adma.h b/arch/arm/include/asm/hardware/iop_adma.h index 385c6e8cbbd..59b8c3892f7 100644 --- a/arch/arm/include/asm/hardware/iop_adma.h +++ b/arch/arm/include/asm/hardware/iop_adma.h @@ -86,6 +86,7 @@ struct iop_adma_chan { * @idx: pool index * @unmap_src_cnt: number of xor sources * @unmap_len: transaction bytecount + * @tx_list: list of descriptors that are associated with one operation * @async_tx: support for the async_tx api * @group_list: list of slots that make up a multi-descriptor transaction * for example transfer lengths larger than the supported hw max @@ -102,10 +103,12 @@ struct iop_adma_desc_slot { u16 idx; u16 unmap_src_cnt; size_t unmap_len; + struct list_head tx_list; struct dma_async_tx_descriptor async_tx; union { u32 *xor_check_result; u32 *crc32_result; + u32 *pq_check_result; }; }; diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index bcdb9291ef0..de6cefb329d 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -103,14 +103,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, #ifdef CONFIG_SMP /* check for possible thread migration */ - if (!cpus_empty(next->cpu_vm_mask) && !cpu_isset(cpu, next->cpu_vm_mask)) + if (!cpumask_empty(mm_cpumask(next)) && + !cpumask_test_cpu(cpu, mm_cpumask(next))) __flush_icache_all(); #endif - if (!cpu_test_and_set(cpu, next->cpu_vm_mask) || prev != next) { + if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) { check_context(next); cpu_switch_mm(next->pgd, next); if (cache_is_vivt()) - cpu_clear(cpu, prev->cpu_vm_mask); + cpumask_clear_cpu(cpu, mm_cpumask(prev)); } #endif } diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index a06e735b262..e0d763be184 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -93,7 +93,6 @@ extern void platform_cpu_enable(unsigned int cpu); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask /* * show local interrupt info diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index c964f3fc3bc..a45ab5dd825 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -350,7 +350,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) if (tlb_flag(TLB_WB)) dsb(); - if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) { + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) { if (tlb_flag(TLB_V3_FULL)) asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (zero) : "cc"); if (tlb_flag(TLB_V4_U_FULL)) @@ -388,7 +388,7 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) if (tlb_flag(TLB_WB)) dsb(); - if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) { + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { if (tlb_flag(TLB_V3_PAGE)) asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (uaddr) : "cc"); if (tlb_flag(TLB_V4_U_PAGE)) diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 3213c9382b1..c446aeff7b8 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -2,7 +2,8 @@ # Makefile for the linux kernel. # -AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) +CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) +AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) ifdef CONFIG_DYNAMIC_FTRACE CFLAGS_REMOVE_ftrace.o = -pg diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c index 3f470866bb8..e7cbb50dc35 100644 --- a/arch/arm/kernel/init_task.c +++ b/arch/arm/kernel/init_task.c @@ -24,9 +24,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * * The things we do for performance.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index de885fd256c..e0d32770bb3 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -189,7 +189,7 @@ int __cpuexit __cpu_disable(void) read_lock(&tasklist_lock); for_each_process(p) { if (p->mm) - cpu_clear(cpu, p->mm->cpu_vm_mask); + cpumask_clear_cpu(cpu, mm_cpumask(p->mm)); } read_unlock(&tasklist_lock); @@ -257,7 +257,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) atomic_inc(&mm->mm_users); atomic_inc(&mm->mm_count); current->active_mm = mm; - cpu_set(cpu, mm->cpu_vm_mask); + cpumask_set_cpu(cpu, mm_cpumask(mm)); cpu_switch_mm(mm->pgd, mm); enter_lazy_tlb(mm, current); local_flush_tlb_all(); @@ -643,7 +643,7 @@ void flush_tlb_all(void) void flush_tlb_mm(struct mm_struct *mm) { if (tlb_ops_need_broadcast()) - on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, &mm->cpu_vm_mask); + on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm)); else local_flush_tlb_mm(mm); } @@ -654,7 +654,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) struct tlb_args ta; ta.ta_vma = vma; ta.ta_start = uaddr; - on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, &vma->vm_mm->cpu_vm_mask); + on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm)); } else local_flush_tlb_page(vma, uaddr); } @@ -677,7 +677,7 @@ void flush_tlb_range(struct vm_area_struct *vma, ta.ta_vma = vma; ta.ta_start = start; ta.ta_end = end; - on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, &vma->vm_mm->cpu_vm_mask); + on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm)); } else local_flush_tlb_range(vma, start, end); } diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index b3ec641b5cf..78ecaac6520 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -25,7 +25,6 @@ #include <linux/mman.h> #include <linux/fs.h> #include <linux/file.h> -#include <linux/utsname.h> #include <linux/ipc.h> #include <linux/uaccess.h> diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c index 55719a97427..fb5c23af101 100644 --- a/arch/arm/mach-at91/at91sam9263_devices.c +++ b/arch/arm/mach-at91/at91sam9263_devices.c @@ -757,6 +757,42 @@ void __init at91_add_device_ac97(struct ac97c_platform_data *data) void __init at91_add_device_ac97(struct ac97c_platform_data *data) {} #endif +/* -------------------------------------------------------------------- + * CAN Controller + * -------------------------------------------------------------------- */ + +#if defined(CONFIG_CAN_AT91) || defined(CONFIG_CAN_AT91_MODULE) +static struct resource can_resources[] = { + [0] = { + .start = AT91SAM9263_BASE_CAN, + .end = AT91SAM9263_BASE_CAN + SZ_16K - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = AT91SAM9263_ID_CAN, + .end = AT91SAM9263_ID_CAN, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device at91sam9263_can_device = { + .name = "at91_can", + .id = -1, + .resource = can_resources, + .num_resources = ARRAY_SIZE(can_resources), +}; + +void __init at91_add_device_can(struct at91_can_data *data) +{ + at91_set_A_periph(AT91_PIN_PA13, 0); /* CANTX */ + at91_set_A_periph(AT91_PIN_PA14, 0); /* CANRX */ + at91sam9263_can_device.dev.platform_data = data; + + platform_device_register(&at91sam9263_can_device); +} +#else +void __init at91_add_device_can(struct at91_can_data *data) {} +#endif /* -------------------------------------------------------------------- * LCD Controller diff --git a/arch/arm/mach-at91/board-sam9263ek.c b/arch/arm/mach-at91/board-sam9263ek.c index 26f1aa6049a..2d867fb0630 100644 --- a/arch/arm/mach-at91/board-sam9263ek.c +++ b/arch/arm/mach-at91/board-sam9263ek.c @@ -400,6 +400,23 @@ static struct gpio_led ek_pwm_led[] = { } }; +/* + * CAN + */ +static void sam9263ek_transceiver_switch(int on) +{ + if (on) { + at91_set_gpio_output(AT91_PIN_PA18, 1); /* CANRXEN */ + at91_set_gpio_output(AT91_PIN_PA19, 0); /* CANRS */ + } else { + at91_set_gpio_output(AT91_PIN_PA18, 0); /* CANRXEN */ + at91_set_gpio_output(AT91_PIN_PA19, 1); /* CANRS */ + } +} + +static struct at91_can_data ek_can_data = { + .transceiver_switch = sam9263ek_transceiver_switch, +}; static void __init ek_board_init(void) { @@ -431,6 +448,8 @@ static void __init ek_board_init(void) /* LEDs */ at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds)); at91_pwm_leds(ek_pwm_led, ARRAY_SIZE(ek_pwm_led)); + /* CAN */ + at91_add_device_can(&ek_can_data); } MACHINE_START(AT91SAM9263EK, "Atmel AT91SAM9263-EK") diff --git a/arch/arm/mach-at91/include/mach/board.h b/arch/arm/mach-at91/include/mach/board.h index 583f38a38df..2f4fcedc02b 100644 --- a/arch/arm/mach-at91/include/mach/board.h +++ b/arch/arm/mach-at91/include/mach/board.h @@ -188,6 +188,12 @@ extern void __init at91_add_device_isi(void); /* Touchscreen Controller */ extern void __init at91_add_device_tsadcc(void); +/* CAN */ +struct at91_can_data { + void (*transceiver_switch)(int on); +}; +extern void __init at91_add_device_can(struct at91_can_data *data); + /* LEDs */ extern void __init at91_init_leds(u8 cpu_led, u8 timer_led); extern void __init at91_gpio_leds(struct gpio_led *leds, int nr); diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h index 5722e86f217..6d3782d85a9 100644 --- a/arch/arm/mach-iop13xx/include/mach/adma.h +++ b/arch/arm/mach-iop13xx/include/mach/adma.h @@ -150,6 +150,8 @@ static inline int iop_adma_get_max_xor(void) return 16; } +#define iop_adma_get_max_pq iop_adma_get_max_xor + static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) { return __raw_readl(ADMA_ADAR(chan)); @@ -211,7 +213,10 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op) #define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT #define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT #define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT +#define IOP_ADMA_PQ_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT #define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o) +#define iop_chan_pq_slot_count iop_chan_xor_slot_count +#define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan) @@ -220,6 +225,13 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, return hw_desc->dest_addr; } +static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *chan) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + return hw_desc->q_dest_addr; +} + static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan) { @@ -319,6 +331,58 @@ iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, return 1; } +static inline void +iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt, + unsigned long flags) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + union { + u32 value; + struct iop13xx_adma_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = 0; + u_desc_ctrl.field.src_select = src_cnt - 1; + u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ + u_desc_ctrl.field.pq_xfer_en = 1; + u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P); + u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; + hw_desc->desc_ctrl = u_desc_ctrl.value; +} + +static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + union { + u32 value; + struct iop13xx_adma_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = hw_desc->desc_ctrl; + return u_desc_ctrl.field.pq_xfer_en; +} + +static inline void +iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, + unsigned long flags) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + union { + u32 value; + struct iop13xx_adma_desc_ctrl field; + } u_desc_ctrl; + + u_desc_ctrl.value = 0; + u_desc_ctrl.field.src_select = src_cnt - 1; + u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ + u_desc_ctrl.field.zero_result = 1; + u_desc_ctrl.field.status_write_back_en = 1; + u_desc_ctrl.field.pq_xfer_en = 1; + u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P); + u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; + hw_desc->desc_ctrl = u_desc_ctrl.value; +} + static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan, u32 byte_count) @@ -351,6 +415,7 @@ iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) } } +#define iop_desc_set_pq_zero_sum_byte_count iop_desc_set_zero_sum_byte_count static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan, @@ -361,6 +426,16 @@ static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, hw_desc->upper_dest_addr = 0; } +static inline void +iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr) +{ + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; + + hw_desc->dest_addr = addr[0]; + hw_desc->q_dest_addr = addr[1]; + hw_desc->upper_dest_addr = 0; +} + static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, dma_addr_t addr) { @@ -389,6 +464,29 @@ static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc, } static inline void +iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx, + dma_addr_t addr, unsigned char coef) +{ + int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter; + struct iop13xx_adma_src *src; + int i = 0; + + do { + iter = iop_hw_desc_slot_idx(hw_desc, i); + src = &iter->src[src_idx]; + src->src_addr = addr; + src->pq_upper_src_addr = 0; + src->pq_dmlt = coef; + slot_cnt -= slots_per_op; + if (slot_cnt) { + i += slots_per_op; + addr += IOP_ADMA_PQ_MAX_BYTE_COUNT; + } + } while (slot_cnt); +} + +static inline void iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan) { @@ -399,6 +497,15 @@ iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, } #define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr +#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr + +static inline void +iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx, + dma_addr_t *src) +{ + iop_desc_set_xor_src_addr(desc, pq_idx, src[pq_idx]); + iop_desc_set_xor_src_addr(desc, pq_idx+1, src[pq_idx+1]); +} static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, u32 next_desc_addr) @@ -428,18 +535,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, hw_desc->block_fill_data = val; } -static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) +static inline enum sum_check_flags +iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) { struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; + enum sum_check_flags flags; BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); - if (desc_ctrl.pq_xfer_en) - return byte_count.zero_result_err_q; - else - return byte_count.zero_result_err; + flags = byte_count.zero_result_err_q << SUM_CHECK_Q; + flags |= byte_count.zero_result_err << SUM_CHECK_P; + + return flags; } static inline void iop_chan_append(struct iop_adma_chan *chan) diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index bee42c609df..5c147fb66a0 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c @@ -477,10 +477,8 @@ void __init iop13xx_platform_init(void) plat_data = &iop13xx_adma_0_data; dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask); - dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); - dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); + dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); dma_cap_set(DMA_MEMSET, plat_data->cap_mask); - dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); break; case IOP13XX_INIT_ADMA_1: @@ -489,10 +487,8 @@ void __init iop13xx_platform_init(void) plat_data = &iop13xx_adma_1_data; dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask); - dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); - dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); + dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); dma_cap_set(DMA_MEMSET, plat_data->cap_mask); - dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); break; case IOP13XX_INIT_ADMA_2: @@ -501,14 +497,11 @@ void __init iop13xx_platform_init(void) plat_data = &iop13xx_adma_2_data; dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); dma_cap_set(DMA_XOR, plat_data->cap_mask); - dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); - dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); + dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); dma_cap_set(DMA_MEMSET, plat_data->cap_mask); - dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); - dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); - dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); - dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask); + dma_cap_set(DMA_PQ, plat_data->cap_mask); + dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask); break; } } diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index fc84fcc7438..6bda76a4319 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -59,6 +59,6 @@ void __new_context(struct mm_struct *mm) } spin_unlock(&cpu_asid_lock); - mm->cpu_vm_mask = cpumask_of_cpu(smp_processor_id()); + cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id())); mm->context.id = asid; } diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 575f3ad722e..b27942909b2 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -50,7 +50,7 @@ static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr) void flush_cache_mm(struct mm_struct *mm) { if (cache_is_vivt()) { - if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) __cpuc_flush_user_all(); return; } @@ -73,7 +73,7 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (cache_is_vivt()) { - if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) __cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end), vma->vm_flags); return; @@ -97,7 +97,7 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn) { if (cache_is_vivt()) { - if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) { + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { unsigned long addr = user_addr & PAGE_MASK; __cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags); } @@ -113,7 +113,7 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, unsigned long len, int write) { if (cache_is_vivt()) { - if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) { + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { unsigned long addr = (unsigned long)kaddr; __cpuc_coherent_kern_range(addr, addr + len); } @@ -126,7 +126,7 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, } /* VIPT non-aliasing cache */ - if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask) && + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)) && vma->vm_flags & VM_EXEC) { unsigned long addr = (unsigned long)kaddr; /* only flushing the kernel mapping on non-aliasing VIPT */ diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c index 3c127aabe21..1ff6a37e893 100644 --- a/arch/arm/plat-iop/adma.c +++ b/arch/arm/plat-iop/adma.c @@ -179,7 +179,6 @@ static int __init iop3xx_adma_cap_init(void) dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); #else dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask); - dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask); dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); #endif @@ -188,7 +187,6 @@ static int __init iop3xx_adma_cap_init(void) dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); #else dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask); - dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask); dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); #endif @@ -198,7 +196,7 @@ static int __init iop3xx_adma_cap_init(void) dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); #else dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); - dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask); + dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); #endif diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c index 57ec9f2dcd9..6b2343e6fe3 100644 --- a/arch/avr32/kernel/init_task.c +++ b/arch/avr32/kernel/init_task.c @@ -18,9 +18,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); /* * Initial thread structure. Must be aligned on an 8192-byte boundary. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c index 376f18c4a6c..94925641e53 100644 --- a/arch/avr32/mm/init.c +++ b/arch/avr32/mm/init.c @@ -24,11 +24,9 @@ #include <asm/setup.h> #include <asm/sections.h> -#define __page_aligned __attribute__((section(".data.page_aligned"))) - DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned; +pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_data; struct page *empty_zero_page; EXPORT_SYMBOL(empty_zero_page); diff --git a/arch/blackfin/Makefile b/arch/blackfin/Makefile index 6f9533c3d75..f063b772934 100644 --- a/arch/blackfin/Makefile +++ b/arch/blackfin/Makefile @@ -155,7 +155,7 @@ define archhelp echo '* vmImage.gz - Kernel-only image for U-Boot (arch/$(ARCH)/boot/vmImage.gz)' echo ' vmImage.lzma - Kernel-only image for U-Boot (arch/$(ARCH)/boot/vmImage.lzma)' echo ' install - Install kernel using' - echo ' (your) ~/bin/$(CROSS_COMPILE)installkernel or' - echo ' (distribution) PATH: $(CROSS_COMPILE)installkernel or' + echo ' (your) ~/bin/$(INSTALLKERNEL) or' + echo ' (distribution) PATH: $(INSTALLKERNEL) or' echo ' install to $$(INSTALL_PATH)' endef diff --git a/arch/blackfin/boot/install.sh b/arch/blackfin/boot/install.sh index 9560a6b2910..e2c6e40902b 100644 --- a/arch/blackfin/boot/install.sh +++ b/arch/blackfin/boot/install.sh @@ -36,9 +36,9 @@ verify "$3" # User may have a custom install script -if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi -if which ${CROSS_COMPILE}installkernel >/dev/null 2>&1; then - exec ${CROSS_COMPILE}installkernel "$@" +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if which ${INSTALLKERNEL} >/dev/null 2>&1; then + exec ${INSTALLKERNEL} "$@" fi # Default install - same as make zlilo diff --git a/arch/cris/Makefile b/arch/cris/Makefile index 71e17d3eedd..29c2ceb38a7 100644 --- a/arch/cris/Makefile +++ b/arch/cris/Makefile @@ -42,8 +42,6 @@ LD = $(CROSS_COMPILE)ld -mcrislinux OBJCOPYFLAGS := -O binary -R .note -R .comment -S -CPPFLAGS_vmlinux.lds = -DDRAM_VIRTUAL_BASE=0x$(CONFIG_ETRAX_DRAM_VIRTUAL_BASE) - KBUILD_AFLAGS += -mlinux -march=$(arch-y) $(inc) KBUILD_CFLAGS += -mlinux -march=$(arch-y) -pipe $(inc) KBUILD_CPPFLAGS += $(inc) diff --git a/arch/cris/kernel/Makefile b/arch/cris/kernel/Makefile index ee7bcd4d20b..b45640b3e60 100644 --- a/arch/cris/kernel/Makefile +++ b/arch/cris/kernel/Makefile @@ -3,6 +3,7 @@ # Makefile for the linux kernel. # +CPPFLAGS_vmlinux.lds := -DDRAM_VIRTUAL_BASE=0x$(CONFIG_ETRAX_DRAM_VIRTUAL_BASE) extra-y := vmlinux.lds obj-y := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 51dcd04d277..c99aeab7cef 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -45,9 +45,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c index 1d3df1d9495..3c3e0b336a9 100644 --- a/arch/frv/kernel/init_task.c +++ b/arch/frv/kernel/init_task.c @@ -19,9 +19,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c index be722fc1acf..0d4d3e3a4cf 100644 --- a/arch/frv/kernel/pm.c +++ b/arch/frv/kernel/pm.c @@ -150,7 +150,7 @@ static int user_atoi(char __user *ubuf, size_t len) /* * Send us to sleep. */ -static int sysctl_pm_do_suspend(ctl_table *ctl, int write, struct file *filp, +static int sysctl_pm_do_suspend(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int retval, mode; @@ -198,13 +198,13 @@ static int try_set_cmode(int new_cmode) } -static int cmode_procctl(ctl_table *ctl, int write, struct file *filp, +static int cmode_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int new_cmode; if (!write) - return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); + return proc_dointvec(ctl, write, buffer, lenp, fpos); new_cmode = user_atoi(buffer, *lenp); @@ -301,13 +301,13 @@ static int try_set_cm(int new_cm) return 0; } -static int p0_procctl(ctl_table *ctl, int write, struct file *filp, +static int p0_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int new_p0; if (!write) - return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); + return proc_dointvec(ctl, write, buffer, lenp, fpos); new_p0 = user_atoi(buffer, *lenp); @@ -345,13 +345,13 @@ static int p0_sysctl(ctl_table *table, return 1; } -static int cm_procctl(ctl_table *ctl, int write, struct file *filp, +static int cm_procctl(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos) { int new_cm; if (!write) - return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); + return proc_dointvec(ctl, write, buffer, lenp, fpos); new_cm = user_atoi(buffer, *lenp); diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c index baadc97f862..2b6b5289cdc 100644 --- a/arch/frv/kernel/sys_frv.c +++ b/arch/frv/kernel/sys_frv.c @@ -21,7 +21,6 @@ #include <linux/stat.h> #include <linux/mman.h> #include <linux/file.h> -#include <linux/utsname.h> #include <linux/syscalls.h> #include <linux/ipc.h> diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c index 089c65ed6eb..54c1062ee80 100644 --- a/arch/h8300/kernel/init_task.c +++ b/arch/h8300/kernel/init_task.c @@ -31,7 +31,6 @@ EXPORT_SYMBOL(init_task); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c index 2745656dcc5..8cb5d73a0e3 100644 --- a/arch/h8300/kernel/sys_h8300.c +++ b/arch/h8300/kernel/sys_h8300.c @@ -17,7 +17,6 @@ #include <linux/syscalls.h> #include <linux/mman.h> #include <linux/file.h> -#include <linux/utsname.h> #include <linux/fs.h> #include <linux/ipc.h> diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h index d217d1d4e05..0b3b3997dec 100644 --- a/arch/ia64/include/asm/smp.h +++ b/arch/ia64/include/asm/smp.h @@ -127,7 +127,6 @@ extern int is_multithreading_enabled(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask #else /* CONFIG_SMP */ diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index d0141fbf51d..3ddb4e709db 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -33,7 +33,6 @@ /* * Returns a bitmask of CPUs on Node 'node'. */ -#define node_to_cpumask(node) (node_to_cpu_mask[node]) #define cpumask_of_node(node) (&node_to_cpu_mask[node]) /* @@ -104,8 +103,6 @@ void build_cpu_to_node_map(void); #ifdef CONFIG_SMP #define topology_physical_package_id(cpu) (cpu_data(cpu)->socket_id) #define topology_core_id(cpu) (cpu_data(cpu)->core_id) -#define topology_core_siblings(cpu) (cpu_core_map[cpu]) -#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) #define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #define smt_capable() (smp_num_siblings > 1) diff --git a/arch/ia64/install.sh b/arch/ia64/install.sh index 929e780026d..0e932f5dcd1 100644 --- a/arch/ia64/install.sh +++ b/arch/ia64/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi # Default install - same as make zlilo diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate index 1d87f84069b..ab9b03a9adc 100644 --- a/arch/ia64/kernel/Makefile.gate +++ b/arch/ia64/kernel/Makefile.gate @@ -10,7 +10,7 @@ quiet_cmd_gate = GATE $@ cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@ GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) $(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE $(call if_changed,gate) diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index c475fc281be..e253ab8fcbc 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -33,7 +33,8 @@ union { struct thread_info thread_info; } s; unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)]; -} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{ +} init_task_mem asm ("init_task") __init_task_data = + {{ .task = INIT_TASK(init_task_mem.s.task), .thread_info = INIT_THREAD_INFO(init_task_mem.s.task) }}; diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 93ebfea43c6..dabeefe2113 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -302,7 +302,7 @@ smp_flush_tlb_mm (struct mm_struct *mm) return; } - smp_call_function_mask(mm->cpu_vm_mask, + smp_call_function_many(mm_cpumask(mm), (void (*)(void *))local_finish_flush_tlb_mm, mm, 1); local_irq_disable(); local_finish_flush_tlb_mm(mm); diff --git a/arch/m32r/boot/compressed/install.sh b/arch/m32r/boot/compressed/install.sh index 6d72e9e7269..16e5a0a1343 100644 --- a/arch/m32r/boot/compressed/install.sh +++ b/arch/m32r/boot/compressed/install.sh @@ -24,8 +24,8 @@ # User may have a custom install script -if [ -x /sbin/installkernel ]; then - exec /sbin/installkernel "$@" +if [ -x /sbin/${INSTALLKERNEL} ]; then + exec /sbin/${INSTALLKERNEL} "$@" fi if [ "$2" = "zImage" ]; then diff --git a/arch/m32r/include/asm/mmu_context.h b/arch/m32r/include/asm/mmu_context.h index 91909e5dd9d..a70a3df3363 100644 --- a/arch/m32r/include/asm/mmu_context.h +++ b/arch/m32r/include/asm/mmu_context.h @@ -127,7 +127,7 @@ static inline void switch_mm(struct mm_struct *prev, if (prev != next) { #ifdef CONFIG_SMP - cpu_set(cpu, next->cpu_vm_mask); + cpumask_set_cpu(cpu, mm_cpumask(next)); #endif /* CONFIG_SMP */ /* Set MPTB = next->pgd */ *(volatile unsigned long *)MPTB = (unsigned long)next->pgd; @@ -135,7 +135,7 @@ static inline void switch_mm(struct mm_struct *prev, } #ifdef CONFIG_SMP else - if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) + if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) activate_context(next); #endif /* CONFIG_SMP */ } diff --git a/arch/m32r/include/asm/smp.h b/arch/m32r/include/asm/smp.h index b96a6d2ffbc..e67ded1aab9 100644 --- a/arch/m32r/include/asm/smp.h +++ b/arch/m32r/include/asm/smp.h @@ -88,7 +88,7 @@ extern void smp_send_timer(void); extern unsigned long send_IPI_mask_phys(cpumask_t, int, int); extern void arch_send_call_function_single_ipi(int cpu); -extern void arch_send_call_function_ipi(cpumask_t mask); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); #endif /* not __ASSEMBLY__ */ diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c index fce57e5d3f9..6c42d5f8df5 100644 --- a/arch/m32r/kernel/init_task.c +++ b/arch/m32r/kernel/init_task.c @@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c index 929e5c9d3ad..1b7598e6f6e 100644 --- a/arch/m32r/kernel/smp.c +++ b/arch/m32r/kernel/smp.c @@ -85,7 +85,7 @@ void smp_ipi_timer_interrupt(struct pt_regs *); void smp_local_timer_interrupt(void); static void send_IPI_allbutself(int, int); -static void send_IPI_mask(cpumask_t, int, int); +static void send_IPI_mask(const struct cpumask *, int, int); unsigned long send_IPI_mask_phys(cpumask_t, int, int); /*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*/ @@ -113,7 +113,7 @@ unsigned long send_IPI_mask_phys(cpumask_t, int, int); void smp_send_reschedule(int cpu_id) { WARN_ON(cpu_is_offline(cpu_id)); - send_IPI_mask(cpumask_of_cpu(cpu_id), RESCHEDULE_IPI, 1); + send_IPI_mask(cpumask_of(cpu_id), RESCHEDULE_IPI, 1); } /*==========================================================================* @@ -168,7 +168,7 @@ void smp_flush_cache_all(void) spin_lock(&flushcache_lock); mask=cpus_addr(cpumask); atomic_set_mask(*mask, (atomic_t *)&flushcache_cpumask); - send_IPI_mask(cpumask, INVALIDATE_CACHE_IPI, 0); + send_IPI_mask(&cpumask, INVALIDATE_CACHE_IPI, 0); _flush_cache_copyback_all(); while (flushcache_cpumask) mb(); @@ -264,7 +264,7 @@ void smp_flush_tlb_mm(struct mm_struct *mm) preempt_disable(); cpu_id = smp_processor_id(); mmc = &mm->context[cpu_id]; - cpu_mask = mm->cpu_vm_mask; + cpu_mask = *mm_cpumask(mm); cpu_clear(cpu_id, cpu_mask); if (*mmc != NO_CONTEXT) { @@ -273,7 +273,7 @@ void smp_flush_tlb_mm(struct mm_struct *mm) if (mm == current->mm) activate_context(mm); else - cpu_clear(cpu_id, mm->cpu_vm_mask); + cpumask_clear_cpu(cpu_id, mm_cpumask(mm)); local_irq_restore(flags); } if (!cpus_empty(cpu_mask)) @@ -334,7 +334,7 @@ void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long va) preempt_disable(); cpu_id = smp_processor_id(); mmc = &mm->context[cpu_id]; - cpu_mask = mm->cpu_vm_mask; + cpu_mask = *mm_cpumask(mm); cpu_clear(cpu_id, cpu_mask); #ifdef DEBUG_SMP @@ -424,7 +424,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_IPI, 0); + send_IPI_mask(&cpumask, INVALIDATE_TLB_IPI, 0); while (!cpus_empty(flush_cpumask)) { /* nothing. lockup detection does not belong here */ @@ -469,7 +469,7 @@ void smp_invalidate_interrupt(void) if (flush_mm == current->active_mm) activate_context(flush_mm); else - cpu_clear(cpu_id, flush_mm->cpu_vm_mask); + cpumask_clear_cpu(cpu_id, mm_cpumask(flush_mm)); } else { unsigned long va = flush_va; @@ -546,14 +546,14 @@ static void stop_this_cpu(void *dummy) for ( ; ; ); } -void arch_send_call_function_ipi(cpumask_t mask) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { send_IPI_mask(mask, CALL_FUNCTION_IPI, 0); } void arch_send_call_function_single_ipi(int cpu) { - send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNC_SINGLE_IPI, 0); + send_IPI_mask(cpumask_of(cpu), CALL_FUNC_SINGLE_IPI, 0); } /*==========================================================================* @@ -729,7 +729,7 @@ static void send_IPI_allbutself(int ipi_num, int try) cpumask = cpu_online_map; cpu_clear(smp_processor_id(), cpumask); - send_IPI_mask(cpumask, ipi_num, try); + send_IPI_mask(&cpumask, ipi_num, try); } /*==========================================================================* @@ -752,7 +752,7 @@ static void send_IPI_allbutself(int ipi_num, int try) * ---------- --- -------------------------------------------------------- * *==========================================================================*/ -static void send_IPI_mask(cpumask_t cpumask, int ipi_num, int try) +static void send_IPI_mask(const struct cpumask *cpumask, int ipi_num, int try) { cpumask_t physid_mask, tmp; int cpu_id, phys_id; @@ -761,11 +761,11 @@ static void send_IPI_mask(cpumask_t cpumask, int ipi_num, int try) if (num_cpus <= 1) /* NO MP */ return; - cpus_and(tmp, cpumask, cpu_online_map); - BUG_ON(!cpus_equal(cpumask, tmp)); + cpumask_and(&tmp, cpumask, cpu_online_mask); + BUG_ON(!cpumask_equal(cpumask, &tmp)); physid_mask = CPU_MASK_NONE; - for_each_cpu_mask(cpu_id, cpumask){ + for_each_cpu(cpu_id, cpumask) { if ((phys_id = cpu_to_physid(cpu_id)) != -1) cpu_set(phys_id, physid_mask); } diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index 655ea1c47a0..e034844cfc0 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c @@ -178,7 +178,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for (phys_id = 0 ; phys_id < nr_cpu ; phys_id++) physid_set(phys_id, phys_cpu_present_map); #ifndef CONFIG_HOTPLUG_CPU - cpu_present_map = cpu_possible_map; + init_cpu_present(&cpu_possible_map); #endif show_mp_info(nr_cpu); diff --git a/arch/m68k/install.sh b/arch/m68k/install.sh index 9c6bae6112e..57d640d4382 100644 --- a/arch/m68k/install.sh +++ b/arch/m68k/install.sh @@ -33,8 +33,8 @@ verify "$3" # User may have a custom install script -if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi -if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi # Default install - same as make zlilo diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 72bad65dba3..41230c595a8 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -42,9 +42,9 @@ */ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -union thread_union init_thread_union -__attribute__((section(".data.init_task"), aligned(THREAD_SIZE))) - = { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data + __attribute__((aligned(THREAD_SIZE))) = + { INIT_THREAD_INFO(init_task) }; /* initial task structure */ struct task_struct init_task = INIT_TASK(init_task); diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index 7f54efaf60b..7deb402bfc7 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -20,7 +20,6 @@ #include <linux/syscalls.h> #include <linux/mman.h> #include <linux/file.h> -#include <linux/utsname.h> #include <linux/ipc.h> #include <asm/setup.h> diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c index 45e97a207fe..cbf9dc3cc51 100644 --- a/arch/m68knommu/kernel/init_task.c +++ b/arch/m68knommu/kernel/init_task.c @@ -31,7 +31,6 @@ EXPORT_SYMBOL(init_task); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c index 70028163862..efdd090778a 100644 --- a/arch/m68knommu/kernel/sys_m68k.c +++ b/arch/m68knommu/kernel/sys_m68k.c @@ -17,7 +17,6 @@ #include <linux/syscalls.h> #include <linux/mman.h> #include <linux/file.h> -#include <linux/utsname.h> #include <linux/ipc.h> #include <linux/fs.h> diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 2db722d80d4..bbd8327f189 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -6,6 +6,7 @@ mainmenu "Linux/Microblaze Kernel Configuration" config MICROBLAZE def_bool y select HAVE_LMB + select USB_ARCH_HAS_EHCI select ARCH_WANT_OPTIONAL_GPIOLIB config SWAP diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 8439598d465..34187354304 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -37,12 +37,12 @@ CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER)) # r31 holds current when in kernel mode -KBUILD_KERNEL += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2) +KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2) LDFLAGS := LDFLAGS_vmlinux := -LIBGCC := $(shell $(CC) $(KBUILD_KERNEL) -print-libgcc-file-name) +LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) head-y := arch/microblaze/kernel/head.o libs-y += arch/microblaze/lib/ @@ -53,22 +53,41 @@ core-y += arch/microblaze/platform/ boot := arch/microblaze/boot +# Are we making a simpleImage.<boardname> target? If so, crack out the boardname +DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS))) + +ifneq ($(DTB),) + core-y += $(boot)/ +endif + # defines filename extension depending memory management type ifeq ($(CONFIG_MMU),) MMU := -nommu endif -export MMU +export MMU DTB all: linux.bin +BOOT_TARGETS = linux.bin linux.bin.gz simpleImage.% + archclean: $(Q)$(MAKE) $(clean)=$(boot) -linux.bin linux.bin.gz: vmlinux +$(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ define archhelp - echo '* linux.bin - Create raw binary' - echo ' linux.bin.gz - Create compressed raw binary' + echo '* linux.bin - Create raw binary' + echo ' linux.bin.gz - Create compressed raw binary' + echo ' simpleImage.<dt> - ELF image with $(arch)/boot/dts/<dt>.dts linked in' + echo ' - stripped elf with fdt blob + echo ' simpleImage.<dt>.unstrip - full ELF image with fdt blob' + echo ' *_defconfig - Select default config from arch/microblaze/configs' + echo '' + echo ' Targets with <dt> embed a device tree blob inside the image' + echo ' These targets support board with firmware that does not' + echo ' support passing a device tree directly. Replace <dt> with the' + echo ' name of a dts file from the arch/microblaze/boot/dts/ directory' + echo ' (minus the .dts extension).' endef diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile index c2bb043a029..21f13322a4c 100644 --- a/arch/microblaze/boot/Makefile +++ b/arch/microblaze/boot/Makefile @@ -2,10 +2,24 @@ # arch/microblaze/boot/Makefile # -targets := linux.bin linux.bin.gz +obj-y += linked_dtb.o + +targets := linux.bin linux.bin.gz simpleImage.% OBJCOPYFLAGS_linux.bin := -O binary +# Where the DTS files live +dtstree := $(srctree)/$(src)/dts + +# Ensure system.dtb exists +$(obj)/linked_dtb.o: $(obj)/system.dtb + +# Generate system.dtb from $(DTB).dtb +ifneq ($(DTB),system) +$(obj)/system.dtb: $(obj)/$(DTB).dtb + $(call if_changed,cp) +endif + $(obj)/linux.bin: vmlinux FORCE [ -n $(CONFIG_INITRAMFS_SOURCE) ] && [ ! -e $(CONFIG_INITRAMFS_SOURCE) ] && \ touch $(CONFIG_INITRAMFS_SOURCE) || echo "No CPIO image" @@ -16,4 +30,27 @@ $(obj)/linux.bin.gz: $(obj)/linux.bin FORCE $(call if_changed,gzip) @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' -clean-kernel += linux.bin linux.bin.gz +quiet_cmd_cp = CP $< $@$2 + cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false) + +quiet_cmd_strip = STRIP $@ + cmd_strip = $(STRIP) -K _start -K _end -K __log_buf -K _fdt_start vmlinux -o $@ + +$(obj)/simpleImage.%: vmlinux FORCE + $(call if_changed,cp,.unstrip) + $(call if_changed,strip) + @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' + +# Rule to build device tree blobs +DTC = $(objtree)/scripts/dtc/dtc + +# Rule to build device tree blobs +quiet_cmd_dtc = DTC $@ + cmd_dtc = $(DTC) -O dtb -o $(obj)/$*.dtb -b 0 -p 1024 $(dtstree)/$*.dts + +$(obj)/%.dtb: $(dtstree)/%.dts FORCE + $(call if_changed,dtc) + +clean-kernel += linux.bin linux.bin.gz simpleImage.* + +clean-files += *.dtb diff --git a/arch/microblaze/boot/dts/system.dts b/arch/microblaze/boot/dts/system.dts new file mode 120000 index 00000000000..7cb657892f2 --- /dev/null +++ b/arch/microblaze/boot/dts/system.dts @@ -0,0 +1 @@ +../../platform/generic/system.dts
\ No newline at end of file diff --git a/arch/microblaze/boot/linked_dtb.S b/arch/microblaze/boot/linked_dtb.S new file mode 100644 index 00000000000..cb2b537aebe --- /dev/null +++ b/arch/microblaze/boot/linked_dtb.S @@ -0,0 +1,3 @@ +.section __fdt_blob,"a" +.incbin "arch/microblaze/boot/system.dtb" + diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index 09c32962b66..bb7c374713a 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.31-rc6 -# Tue Aug 18 11:00:02 2009 +# Linux kernel version: 2.6.31 +# Thu Sep 24 10:28:50 2009 # CONFIG_MICROBLAZE=y # CONFIG_SWAP is not set @@ -42,11 +42,12 @@ CONFIG_SYSVIPC_SYSCTL=y # # RCU Subsystem # -CONFIG_CLASSIC_RCU=y -# CONFIG_TREE_RCU is not set -# CONFIG_PREEMPT_RCU is not set +CONFIG_TREE_RCU=y +# CONFIG_TREE_PREEMPT_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set # CONFIG_TREE_RCU_TRACE is not set -# CONFIG_PREEMPT_RCU_TRACE is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=17 @@ -260,6 +261,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_NETFILTER is not set # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set +# CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set @@ -357,12 +359,10 @@ CONFIG_NET_ETHERNET=y # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set # CONFIG_KS8842 is not set +CONFIG_XILINX_EMACLITE=y CONFIG_NETDEV_1000=y CONFIG_NETDEV_10000=y - -# -# Wireless LAN -# +CONFIG_WLAN=y # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set @@ -460,6 +460,7 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y # CONFIG_DISPLAY_SUPPORT is not set # CONFIG_SOUND is not set # CONFIG_USB_SUPPORT is not set +CONFIG_USB_ARCH_HAS_EHCI=y # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set @@ -488,6 +489,7 @@ CONFIG_EXT2_FS=y # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set # CONFIG_BTRFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_FILE_LOCKING=y CONFIG_FSNOTIFY=y # CONFIG_DNOTIFY is not set @@ -546,7 +548,6 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set -# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -671,18 +672,20 @@ CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set # CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_DEBUG_CREDENTIALS is not set # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # CONFIG_FAULT_INJECTION is not set # CONFIG_SYSCTL_SYSCALL_CHECK is not set # CONFIG_PAGE_POISONING is not set # CONFIG_SAMPLES is not set # CONFIG_KMEMCHECK is not set CONFIG_EARLY_PRINTK=y -CONFIG_HEART_BEAT=y +# CONFIG_HEART_BEAT is not set CONFIG_DEBUG_BOOTMEM=y # @@ -697,7 +700,6 @@ CONFIG_CRYPTO=y # # Crypto core or helper # -# CONFIG_CRYPTO_FIPS is not set # CONFIG_CRYPTO_MANAGER is not set # CONFIG_CRYPTO_MANAGER2 is not set # CONFIG_CRYPTO_GF128MUL is not set @@ -729,11 +731,13 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_HMAC is not set # CONFIG_CRYPTO_XCBC is not set +# CONFIG_CRYPTO_VMAC is not set # # Digest # # CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_GHASH is not set # CONFIG_CRYPTO_MD4 is not set # CONFIG_CRYPTO_MD5 is not set # CONFIG_CRYPTO_MICHAEL_MIC is not set diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig index 8b638615a97..adb839bab70 100644 --- a/arch/microblaze/configs/nommu_defconfig +++ b/arch/microblaze/configs/nommu_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.31-rc6 -# Tue Aug 18 10:35:30 2009 +# Linux kernel version: 2.6.31 +# Thu Sep 24 10:29:43 2009 # CONFIG_MICROBLAZE=y # CONFIG_SWAP is not set @@ -44,11 +44,12 @@ CONFIG_BSD_PROCESS_ACCT_V3=y # # RCU Subsystem # -CONFIG_CLASSIC_RCU=y -# CONFIG_TREE_RCU is not set -# CONFIG_PREEMPT_RCU is not set +CONFIG_TREE_RCU=y +# CONFIG_TREE_PREEMPT_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set # CONFIG_TREE_RCU_TRACE is not set -# CONFIG_PREEMPT_RCU_TRACE is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=17 @@ -243,6 +244,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_NETFILTER is not set # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set +# CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set @@ -272,6 +274,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_AF_RXRPC is not set CONFIG_WIRELESS=y # CONFIG_CFG80211 is not set +CONFIG_CFG80211_DEFAULT_PS_VALUE=0 CONFIG_WIRELESS_OLD_REGULATORY=y # CONFIG_WIRELESS_EXT is not set # CONFIG_LIB80211 is not set @@ -279,7 +282,6 @@ CONFIG_WIRELESS_OLD_REGULATORY=y # # CFG80211 needs to be enabled for MAC80211 # -CONFIG_MAC80211_DEFAULT_PS_VALUE=0 # CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -304,6 +306,7 @@ CONFIG_MTD_PARTITIONS=y # CONFIG_MTD_TESTS is not set # CONFIG_MTD_REDBOOT_PARTS is not set CONFIG_MTD_CMDLINE_PARTS=y +# CONFIG_MTD_OF_PARTS is not set # CONFIG_MTD_AR7_PARTS is not set # @@ -349,6 +352,7 @@ CONFIG_MTD_RAM=y # # CONFIG_MTD_COMPLEX_MAPPINGS is not set # CONFIG_MTD_PHYSMAP is not set +# CONFIG_MTD_PHYSMAP_OF is not set CONFIG_MTD_UCLINUX=y # CONFIG_MTD_PLATRAM is not set @@ -429,12 +433,10 @@ CONFIG_NET_ETHERNET=y # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set # CONFIG_KS8842 is not set +# CONFIG_XILINX_EMACLITE is not set CONFIG_NETDEV_1000=y CONFIG_NETDEV_10000=y - -# -# Wireless LAN -# +CONFIG_WLAN=y # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set @@ -535,7 +537,7 @@ CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y # CONFIG_USB_ARCH_HAS_OHCI is not set -# CONFIG_USB_ARCH_HAS_EHCI is not set +CONFIG_USB_ARCH_HAS_EHCI=y # CONFIG_USB is not set # CONFIG_USB_OTG_WHITELIST is not set # CONFIG_USB_OTG_BLACKLIST_HUB is not set @@ -579,6 +581,7 @@ CONFIG_FS_POSIX_ACL=y # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set # CONFIG_BTRFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_FILE_LOCKING=y CONFIG_FSNOTIFY=y # CONFIG_DNOTIFY is not set @@ -639,7 +642,6 @@ CONFIG_ROMFS_BACKED_BY_BLOCK=y CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set -# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -710,18 +712,20 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y # CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_DEBUG_CREDENTIALS is not set # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_BACKTRACE_SELF_TEST is not set # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # CONFIG_FAULT_INJECTION is not set CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_PAGE_POISONING is not set # CONFIG_DYNAMIC_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_EARLY_PRINTK=y -CONFIG_HEART_BEAT=y +# CONFIG_HEART_BEAT is not set # CONFIG_DEBUG_BOOTMEM is not set # @@ -736,7 +740,6 @@ CONFIG_CRYPTO=y # # Crypto core or helper # -# CONFIG_CRYPTO_FIPS is not set # CONFIG_CRYPTO_MANAGER is not set # CONFIG_CRYPTO_MANAGER2 is not set # CONFIG_CRYPTO_GF128MUL is not set @@ -768,11 +771,13 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_HMAC is not set # CONFIG_CRYPTO_XCBC is not set +# CONFIG_CRYPTO_VMAC is not set # # Digest # # CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_GHASH is not set # CONFIG_CRYPTO_MD4 is not set # CONFIG_CRYPTO_MD5 is not set # CONFIG_CRYPTO_MICHAEL_MIC is not set diff --git a/arch/microblaze/include/asm/asm-compat.h b/arch/microblaze/include/asm/asm-compat.h new file mode 100644 index 00000000000..e7bc9dc11b5 --- /dev/null +++ b/arch/microblaze/include/asm/asm-compat.h @@ -0,0 +1,17 @@ +#ifndef _ASM_MICROBLAZE_ASM_COMPAT_H +#define _ASM_MICROBLAZE_ASM_COMPAT_H + +#include <asm/types.h> + +#ifdef __ASSEMBLY__ +# define stringify_in_c(...) __VA_ARGS__ +# define ASM_CONST(x) x +#else +/* This version of stringify will deal with commas... */ +# define __stringify_in_c(...) #__VA_ARGS__ +# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " " +# define __ASM_CONST(x) x##UL +# define ASM_CONST(x) __ASM_CONST(x) +#endif + +#endif /* _ASM_MICROBLAZE_ASM_COMPAT_H */ diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h index 7c3ec13b44d..fc9997b73c0 100644 --- a/arch/microblaze/include/asm/io.h +++ b/arch/microblaze/include/asm/io.h @@ -210,6 +210,9 @@ static inline void __iomem *__ioremap(phys_addr_t address, unsigned long size, #define in_be32(a) __raw_readl((const void __iomem __force *)(a)) #define in_be16(a) __raw_readw(a) +#define writel_be(v, a) out_be32((__force unsigned *)a, v) +#define readl_be(a) in_be32((__force unsigned *)a) + /* * Little endian */ diff --git a/arch/microblaze/include/asm/ipc.h b/arch/microblaze/include/asm/ipc.h deleted file mode 100644 index a46e3d9c2a3..00000000000 --- a/arch/microblaze/include/asm/ipc.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ipc.h> diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index 72aceae8868..880c988c223 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -17,6 +17,7 @@ #include <linux/pfn.h> #include <asm/setup.h> +#include <asm/asm-compat.h> #include <linux/const.h> #ifdef __KERNEL__ @@ -26,6 +27,8 @@ #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) +#define LOAD_OFFSET ASM_CONST((CONFIG_KERNEL_START-CONFIG_KERNEL_BASE_ADDR)) + #ifndef __ASSEMBLY__ #define PAGE_UP(addr) (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1))) diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index 27f8dafd8c3..ed67c9ed15b 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -38,7 +38,7 @@ extern void early_console_reg_tlb_alloc(unsigned int addr); void time_init(void); void init_IRQ(void); void machine_early_init(const char *cmdline, unsigned int ram, - unsigned int fdt); + unsigned int fdt, unsigned int msr); void machine_restart(char *cmd); void machine_shutdown(void); diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h new file mode 100644 index 00000000000..048dfcd8d89 --- /dev/null +++ b/arch/microblaze/include/asm/syscall.h @@ -0,0 +1,99 @@ +#ifndef __ASM_MICROBLAZE_SYSCALL_H +#define __ASM_MICROBLAZE_SYSCALL_H + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <asm/ptrace.h> + +/* The system call number is given by the user in R12 */ +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->r12; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + /* TODO. */ +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + return IS_ERR_VALUE(regs->r3) ? regs->r3 : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->r3; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + if (error) + regs->r3 = -error; + else + regs->r3 = val; +} + +static inline microblaze_reg_t microblaze_get_syscall_arg(struct pt_regs *regs, + unsigned int n) +{ + switch (n) { + case 5: return regs->r10; + case 4: return regs->r9; + case 3: return regs->r8; + case 2: return regs->r7; + case 1: return regs->r6; + case 0: return regs->r5; + default: + BUG(); + } + return ~0; +} + +static inline void microblaze_set_syscall_arg(struct pt_regs *regs, + unsigned int n, + unsigned long val) +{ + switch (n) { + case 5: + regs->r10 = val; + case 4: + regs->r9 = val; + case 3: + regs->r8 = val; + case 2: + regs->r7 = val; + case 1: + regs->r6 = val; + case 0: + regs->r5 = val; + default: + BUG(); + } +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + while (n--) + *args++ = microblaze_get_syscall_arg(regs, i++); +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + while (n--) + microblaze_set_syscall_arg(regs, i++, *args++); +} + +#endif /* __ASM_MICROBLAZE_SYSCALL_H */ diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c index c411c6757de..3539babc1c1 100644 --- a/arch/microblaze/kernel/cpu/cpuinfo.c +++ b/arch/microblaze/kernel/cpu/cpuinfo.c @@ -28,6 +28,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = { {"7.10.d", 0x0b}, {"7.20.a", 0x0c}, {"7.20.b", 0x0d}, + {"7.20.c", 0x0e}, /* FIXME There is no keycode defined in MBV for these versions */ {"2.10.a", 0x10}, {"3.00.a", 0x20}, @@ -49,6 +50,8 @@ const struct family_string_key family_string_lookup[] = { {"spartan3a", 0xa}, {"spartan3an", 0xb}, {"spartan3adsp", 0xc}, + {"spartan6", 0xd}, + {"virtex6", 0xe}, /* FIXME There is no key code defined for spartan2 */ {"spartan2", 0xf0}, {NULL, 0}, diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S index c7353e79f4a..acc1f05d1e2 100644 --- a/arch/microblaze/kernel/entry.S +++ b/arch/microblaze/kernel/entry.S @@ -308,38 +308,69 @@ C_ENTRY(_user_exception): swi r12, r1, PTO+PT_R0; tovirt(r1,r1) - la r15, r0, ret_from_trap-8 /* where the trap should return need -8 to adjust for rtsd r15, 8*/ /* Jump to the appropriate function for the system call number in r12 * (r12 is not preserved), or return an error if r12 is not valid. The LP * register should point to the location where * the called function should return. [note that MAKE_SYS_CALL uses label 1] */ - /* See if the system call number is valid. */ + + # Step into virtual mode. + set_vms; + addik r11, r0, 3f + rtid r11, 0 + nop +3: + add r11, r0, CURRENT_TASK /* Get current task ptr into r11 */ + lwi r11, r11, TS_THREAD_INFO /* get thread info */ + lwi r11, r11, TI_FLAGS /* get flags in thread info */ + andi r11, r11, _TIF_WORK_SYSCALL_MASK + beqi r11, 4f + + addik r3, r0, -ENOSYS + swi r3, r1, PTO + PT_R3 + brlid r15, do_syscall_trace_enter + addik r5, r1, PTO + PT_R0 + + # do_syscall_trace_enter returns the new syscall nr. + addk r12, r0, r3 + lwi r5, r1, PTO+PT_R5; + lwi r6, r1, PTO+PT_R6; + lwi r7, r1, PTO+PT_R7; + lwi r8, r1, PTO+PT_R8; + lwi r9, r1, PTO+PT_R9; + lwi r10, r1, PTO+PT_R10; +4: +/* Jump to the appropriate function for the system call number in r12 + * (r12 is not preserved), or return an error if r12 is not valid. + * The LP register should point to the location where the called function + * should return. [note that MAKE_SYS_CALL uses label 1] */ + /* See if the system call number is valid */ addi r11, r12, -__NR_syscalls; - bgei r11,1f; + bgei r11,5f; /* Figure out which function to use for this system call. */ /* Note Microblaze barrel shift is optional, so don't rely on it */ add r12, r12, r12; /* convert num -> ptr */ add r12, r12, r12; /* Trac syscalls and stored them to r0_ram */ - lwi r3, r12, 0x400 + TOPHYS(r0_ram) + lwi r3, r12, 0x400 + r0_ram addi r3, r3, 1 - swi r3, r12, 0x400 + TOPHYS(r0_ram) + swi r3, r12, 0x400 + r0_ram + + # Find and jump into the syscall handler. + lwi r12, r12, sys_call_table + /* where the trap should return need -8 to adjust for rtsd r15, 8 */ + la r15, r0, ret_from_trap-8 + bra r12 - lwi r12, r12, TOPHYS(sys_call_table); /* Function ptr */ - /* Make the system call. to r12*/ - set_vms; - rtid r12, 0; - nop; /* The syscall number is invalid, return an error. */ -1: VM_ON; /* RETURN() expects virtual mode*/ +5: addi r3, r0, -ENOSYS; rtsd r15,8; /* looks like a normal subroutine return */ or r0, r0, r0 -/* Entry point used to return from a syscall/trap. */ +/* Entry point used to return from a syscall/trap */ /* We re-enable BIP bit before state restore */ C_ENTRY(ret_from_trap): set_bip; /* Ints masked for state restore*/ @@ -349,6 +380,23 @@ C_ENTRY(ret_from_trap): /* We're returning to user mode, so check for various conditions that * trigger rescheduling. */ + # FIXME: Restructure all these flag checks. + add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ + lwi r11, r11, TS_THREAD_INFO; /* get thread info */ + lwi r11, r11, TI_FLAGS; /* get flags in thread info */ + andi r11, r11, _TIF_WORK_SYSCALL_MASK + beqi r11, 1f + + swi r3, r1, PTO + PT_R3 + swi r4, r1, PTO + PT_R4 + brlid r15, do_syscall_trace_leave + addik r5, r1, PTO + PT_R0 + lwi r3, r1, PTO + PT_R3 + lwi r4, r1, PTO + PT_R4 +1: + + /* We're returning to user mode, so check for various conditions that + * trigger rescheduling. */ /* Get current task ptr into r11 */ add r11, r0, CURRENT_TASK; /* Get current task ptr into r11 */ lwi r11, r11, TS_THREAD_INFO; /* get thread info */ diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c index 0cb64a31e89..d9f70f83097 100644 --- a/arch/microblaze/kernel/exceptions.c +++ b/arch/microblaze/kernel/exceptions.c @@ -72,7 +72,8 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, #endif #if 0 - printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x ESR=%08x\n", + printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x " \ + "ESR=%08x\n", type, user_mode(regs) ? "user" : "kernel", fsr, (unsigned int) regs->pc, (unsigned int) regs->esr); #endif @@ -80,42 +81,50 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, switch (type & 0x1F) { case MICROBLAZE_ILL_OPCODE_EXCEPTION: if (user_mode(regs)) { - printk(KERN_WARNING "Illegal opcode exception in user mode.\n"); + pr_debug(KERN_WARNING "Illegal opcode exception " \ + "in user mode.\n"); _exception(SIGILL, regs, ILL_ILLOPC, addr); return; } - printk(KERN_WARNING "Illegal opcode exception in kernel mode.\n"); + printk(KERN_WARNING "Illegal opcode exception " \ + "in kernel mode.\n"); die("opcode exception", regs, SIGBUS); break; case MICROBLAZE_IBUS_EXCEPTION: if (user_mode(regs)) { - printk(KERN_WARNING "Instruction bus error exception in user mode.\n"); + pr_debug(KERN_WARNING "Instruction bus error " \ + "exception in user mode.\n"); _exception(SIGBUS, regs, BUS_ADRERR, addr); return; } - printk(KERN_WARNING "Instruction bus error exception in kernel mode.\n"); + printk(KERN_WARNING "Instruction bus error exception " \ + "in kernel mode.\n"); die("bus exception", regs, SIGBUS); break; case MICROBLAZE_DBUS_EXCEPTION: if (user_mode(regs)) { - printk(KERN_WARNING "Data bus error exception in user mode.\n"); + pr_debug(KERN_WARNING "Data bus error exception " \ + "in user mode.\n"); _exception(SIGBUS, regs, BUS_ADRERR, addr); return; } - printk(KERN_WARNING "Data bus error exception in kernel mode.\n"); + printk(KERN_WARNING "Data bus error exception " \ + "in kernel mode.\n"); die("bus exception", regs, SIGBUS); break; case MICROBLAZE_DIV_ZERO_EXCEPTION: if (user_mode(regs)) { - printk(KERN_WARNING "Divide by zero exception in user mode\n"); - _exception(SIGILL, regs, ILL_ILLOPC, addr); + pr_debug(KERN_WARNING "Divide by zero exception " \ + "in user mode\n"); + _exception(SIGILL, regs, FPE_INTDIV, addr); return; } - printk(KERN_WARNING "Divide by zero exception in kernel mode.\n"); + printk(KERN_WARNING "Divide by zero exception " \ + "in kernel mode.\n"); die("Divide by exception", regs, SIGBUS); break; case MICROBLAZE_FPU_EXCEPTION: - printk(KERN_WARNING "FPU exception\n"); + pr_debug(KERN_WARNING "FPU exception\n"); /* IEEE FP exception */ /* I removed fsr variable and use code var for storing fsr */ if (fsr & FSR_IO) @@ -133,7 +142,7 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, #ifdef CONFIG_MMU case MICROBLAZE_PRIVILEGED_EXCEPTION: - printk(KERN_WARNING "Privileged exception\n"); + pr_debug(KERN_WARNING "Privileged exception\n"); /* "brk r0,r0" - used as debug breakpoint */ if (get_user(code, (unsigned long *)regs->pc) == 0 && code == 0x980c0000) { diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S index e41c6ce2a7b..697ce3007f3 100644 --- a/arch/microblaze/kernel/head.S +++ b/arch/microblaze/kernel/head.S @@ -54,6 +54,16 @@ ENTRY(_start) mfs r1, rmsr andi r1, r1, ~2 mts rmsr, r1 +/* + * Here is checking mechanism which check if Microblaze has msr instructions + * We load msr and compare it with previous r1 value - if is the same, + * msr instructions works if not - cpu don't have them. + */ + /* r8=0 - I have msr instr, 1 - I don't have them */ + rsubi r0, r0, 1 /* set the carry bit */ + msrclr r0, 0x4 /* try to clear it */ + /* read the carry bit, r8 will be '0' if msrclr exists */ + addik r8, r0, 0 /* r7 may point to an FDT, or there may be one linked in. if it's in r7, we've got to save it away ASAP. @@ -209,8 +219,8 @@ start_here: * Please see $(ARCH)/mach-$(SUBARCH)/setup.c for * the function. */ - la r8, r0, machine_early_init - brald r15, r8 + la r9, r0, machine_early_init + brald r15, r9 nop #ifndef CONFIG_MMU diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S index 3288c973767..6b0288ebccd 100644 --- a/arch/microblaze/kernel/hw_exception_handler.S +++ b/arch/microblaze/kernel/hw_exception_handler.S @@ -84,9 +84,10 @@ #define NUM_TO_REG(num) r ## num #ifdef CONFIG_MMU -/* FIXME you can't change first load of MSR because there is - * hardcoded jump bri 4 */ #define RESTORE_STATE \ + lwi r5, r1, 0; \ + mts rmsr, r5; \ + nop; \ lwi r3, r1, PT_R3; \ lwi r4, r1, PT_R4; \ lwi r5, r1, PT_R5; \ @@ -309,6 +310,9 @@ _hw_exception_handler: lwi r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)) /* get saved current */ #endif + mfs r5, rmsr; + nop + swi r5, r1, 0; mfs r3, resr nop mfs r4, rear; @@ -380,6 +384,8 @@ handle_other_ex: /* Handle Other exceptions here */ addk r8, r17, r0; /* Load exception address */ bralid r15, full_exception; /* Branch to the handler */ nop; + mts r0, rfsr; /* Clear sticky fsr */ + nop /* * Trigger execution of the signal handler by enabling diff --git a/arch/microblaze/kernel/init_task.c b/arch/microblaze/kernel/init_task.c index 67da22579b6..b5d711f94ff 100644 --- a/arch/microblaze/kernel/init_task.c +++ b/arch/microblaze/kernel/init_task.c @@ -19,9 +19,8 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = -{ INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; struct task_struct init_task = INIT_TASK(init_task); EXPORT_SYMBOL(init_task); diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 00b12c6d532..4201c743cc9 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -235,6 +235,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp) regs->pc = pc; regs->r1 = usp; regs->pt_mode = 0; + regs->msr |= MSR_UMS; } #ifdef CONFIG_MMU diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c index 53ff39af6a5..4b3ac32754d 100644 --- a/arch/microblaze/kernel/ptrace.c +++ b/arch/microblaze/kernel/ptrace.c @@ -29,6 +29,10 @@ #include <linux/sched.h> #include <linux/ptrace.h> #include <linux/signal.h> +#include <linux/elf.h> +#include <linux/audit.h> +#include <linux/seccomp.h> +#include <linux/tracehook.h> #include <linux/errno.h> #include <asm/processor.h> @@ -174,6 +178,64 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) return rval; } +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) +{ + long ret = 0; + + secure_computing(regs->r12); + + if (test_thread_flag(TIF_SYSCALL_TRACE) && + tracehook_report_syscall_entry(regs)) + /* + * Tracing decided this syscall should not happen. + * We'll return a bogus call number to get an ENOSYS + * error, but leave the original number in regs->regs[0]. + */ + ret = -1L; + + if (unlikely(current->audit_context)) + audit_syscall_entry(EM_XILINX_MICROBLAZE, regs->r12, + regs->r5, regs->r6, + regs->r7, regs->r8); + + return ret ?: regs->r12; +} + +asmlinkage void do_syscall_trace_leave(struct pt_regs *regs) +{ + int step; + + if (unlikely(current->audit_context)) + audit_syscall_exit(AUDITSC_RESULT(regs->r3), regs->r3); + + step = test_thread_flag(TIF_SINGLESTEP); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, step); +} + +#if 0 +static asmlinkage void syscall_trace(void) +{ + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return; + if (!(current->ptrace & PT_PTRACED)) + return; + /* The 0x80 provides a way for the tracing parent to distinguish + between a syscall stop and SIGTRAP delivery */ + ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) + ? 0x80 : 0)); + /* + * this isn't the same as continuing with a signal, but it will do + * for normal use. strace only continues with a signal if the + * stopping signal is not SIGTRAP. -brl + */ + if (current->exit_code) { + send_sig(current->exit_code, current, 1); + current->exit_code = 0; + } +} +#endif + void ptrace_disable(struct task_struct *child) { /* nothing to do */ diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index 2a97bf513b6..8c1e0f4dcf1 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -94,7 +94,7 @@ inline unsigned get_romfs_len(unsigned *addr) #endif /* CONFIG_MTD_UCLINUX_EBSS */ void __init machine_early_init(const char *cmdline, unsigned int ram, - unsigned int fdt) + unsigned int fdt, unsigned int msr) { unsigned long *src, *dst = (unsigned long *)0x0; @@ -157,6 +157,16 @@ void __init machine_early_init(const char *cmdline, unsigned int ram, early_printk("New klimit: 0x%08x\n", (unsigned)klimit); #endif +#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR + if (msr) + early_printk("!!!Your kernel has setup MSR instruction but " + "CPU don't have it %d\n", msr); +#else + if (!msr) + early_printk("!!!Your kernel not setup MSR instruction but " + "CPU have it %d\n", msr); +#endif + for (src = __ivt_start; src < __ivt_end; src++, dst++) *dst = *src; diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index b96f1682bb2..07cabed4b94 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -23,7 +23,6 @@ #include <linux/mman.h> #include <linux/sys.h> #include <linux/ipc.h> -#include <linux/utsname.h> #include <linux/file.h> #include <linux/module.h> #include <linux/err.h> diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S index ec5fa91a48d..e704188d785 100644 --- a/arch/microblaze/kernel/vmlinux.lds.S +++ b/arch/microblaze/kernel/vmlinux.lds.S @@ -12,13 +12,16 @@ OUTPUT_FORMAT("elf32-microblaze", "elf32-microblaze", "elf32-microblaze") OUTPUT_ARCH(microblaze) ENTRY(_start) +#include <asm/page.h> #include <asm-generic/vmlinux.lds.h> +#include <asm/thread_info.h> jiffies = jiffies_64 + 4; SECTIONS { . = CONFIG_KERNEL_START; - .text : { + _start = CONFIG_KERNEL_BASE_ADDR; + .text : AT(ADDR(.text) - LOAD_OFFSET) { _text = . ; _stext = . ; *(.text .text.*) @@ -33,24 +36,22 @@ SECTIONS { } . = ALIGN (4) ; - _fdt_start = . ; /* place for fdt blob */ - . = . + 0x4000; - _fdt_end = . ; + __fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET) { + _fdt_start = . ; /* place for fdt blob */ + *(__fdt_blob) ; /* Any link-placed DTB */ + . = _fdt_start + 0x4000; /* Pad up to 16kbyte */ + _fdt_end = . ; + } . = ALIGN(16); RODATA - . = ALIGN(16); - __ex_table : { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } + EXCEPTION_TABLE(16) /* * sdata2 section can go anywhere, but must be word aligned * and SDA2_BASE must point to the middle of it */ - .sdata2 : { + .sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) { _ssrw = .; . = ALIGN(4096); /* page aligned when MMU used - origin 0x8 */ *(.sdata2) @@ -61,12 +62,7 @@ SECTIONS { } _sdata = . ; - .data ALIGN (4096) : { /* page aligned when MMU used - origin 0x4 */ - DATA_DATA - CONSTRUCTORS - } - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } + RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE) _edata = . ; /* Reserve some low RAM for r0 based memory references */ @@ -74,18 +70,14 @@ SECTIONS { r0_ram = . ; . = . + 4096; /* a page should be enough */ - /* The initial task */ - . = ALIGN(8192); - .data.init_task : { *(.data.init_task) } - /* Under the microblaze ABI, .sdata and .sbss must be contiguous */ . = ALIGN(8); - .sdata : { + .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) { _ssro = .; *(.sdata) } - .sbss : { + .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) { _ssbss = .; *(.sbss) _esbss = .; @@ -96,47 +88,36 @@ SECTIONS { __init_begin = .; - . = ALIGN(4096); - .init.text : { - _sinittext = . ; - INIT_TEXT - _einittext = .; - } + INIT_TEXT_SECTION(PAGE_SIZE) - .init.data : { + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { INIT_DATA } . = ALIGN(4); - .init.ivt : { + .init.ivt : AT(ADDR(.init.ivt) - LOAD_OFFSET) { __ivt_start = .; *(.init.ivt) __ivt_end = .; } - .init.setup : { - __setup_start = .; - *(.init.setup) - __setup_end = .; + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { + INIT_SETUP(0) } - .initcall.init : { - __initcall_start = .; - INITCALLS - __initcall_end = .; + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET ) { + INIT_CALLS } - .con_initcall.init : { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + CON_INITCALL } SECURITY_INIT __init_end_before_initramfs = .; - .init.ramfs ALIGN(4096) : { + .init.ramfs ALIGN(4096) : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { __initramfs_start = .; *(.init.ramfs) __initramfs_end = .; @@ -152,7 +133,8 @@ SECTIONS { } __init_end = .; - .bss ALIGN (4096) : { /* page aligned when MMU used */ + .bss ALIGN (4096) : AT(ADDR(.bss) - LOAD_OFFSET) { + /* page aligned when MMU used */ __bss_start = . ; *(.bss*) *(COMMON) diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 1110784eb3f..a44892e7cd5 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -180,7 +180,8 @@ void free_initrd_mem(unsigned long start, unsigned long end) totalram_pages++; pages++; } - printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n", pages); + printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n", + (int)(pages * (PAGE_SIZE / 1024))); } #endif diff --git a/arch/mips/Makefile b/arch/mips/Makefile index c825b14b4ed..77f5021218d 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -627,16 +627,6 @@ endif cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic drivers-$(CONFIG_PCI) += arch/mips/pci/ -ifdef CONFIG_32BIT -ifdef CONFIG_CPU_LITTLE_ENDIAN -JIFFIES = jiffies_64 -else -JIFFIES = jiffies_64 + 4 -endif -else -JIFFIES = jiffies_64 -endif - # # Automatically detect the build format. By default we choose # the elf format according to the load address. @@ -660,8 +650,9 @@ ifdef CONFIG_64BIT endif KBUILD_AFLAGS += $(cflags-y) -KBUILD_CFLAGS += $(cflags-y) \ - -D"VMLINUX_LOAD_ADDRESS=$(load-y)" +KBUILD_CFLAGS += $(cflags-y) +KBUILD_CPPFLAGS += -D"VMLINUX_LOAD_ADDRESS=$(load-y)" +KBUILD_CPPFLAGS += -D"DATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)" LDFLAGS += -m $(ld-emul) @@ -676,18 +667,6 @@ endif OBJCOPYFLAGS += --remove-section=.reginfo -# -# Choosing incompatible machines durings configuration will result in -# error messages during linking. Select a default linkscript if -# none has been choosen above. -# - -CPPFLAGS_vmlinux.lds := \ - $(KBUILD_CFLAGS) \ - -D"LOADADDR=$(load-y)" \ - -D"JIFFIES=$(JIFFIES)" \ - -D"DATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)" - head-y := arch/mips/kernel/head.o arch/mips/kernel/init_task.o libs-y += arch/mips/lib/ diff --git a/arch/mips/alchemy/common/time.c b/arch/mips/alchemy/common/time.c index f34ff860194..379a664809b 100644 --- a/arch/mips/alchemy/common/time.c +++ b/arch/mips/alchemy/common/time.c @@ -88,7 +88,7 @@ static struct clock_event_device au1x_rtcmatch2_clockdev = { .irq = AU1000_RTC_MATCH2_INT, .set_next_event = au1x_rtcmatch2_set_next_event, .set_mode = au1x_rtcmatch2_set_mode, - .cpumask = CPU_MASK_ALL_PTR, + .cpumask = cpu_all_mask, }; static struct irqaction au1x_rtcmatch2_irqaction = { diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index 23059170700..f6837422fe6 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -24,12 +24,10 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS]; #define cpu_to_node(cpu) (sn_cpu_info[(cpu)].p_nodeid) #define parent_node(node) (node) -#define node_to_cpumask(node) (hub_data(node)->h_cpus) #define cpumask_of_node(node) (&hub_data(node)->h_cpus) struct pci_bus; extern int pcibus_to_node(struct pci_bus *); -#define pcibus_to_cpumask(bus) (cpu_online_map) #define cpumask_of_pcibus(bus) (cpu_online_mask) extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h index d3bea88d874..d9743536a62 100644 --- a/arch/mips/include/asm/mmu_context.h +++ b/arch/mips/include/asm/mmu_context.h @@ -178,8 +178,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * Mark current->active_mm as not "active" anymore. * We don't want to mislead possible IPI tlb flush routines. */ - cpu_clear(cpu, prev->cpu_vm_mask); - cpu_set(cpu, next->cpu_vm_mask); + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); local_irq_restore(flags); } @@ -235,8 +235,8 @@ activate_mm(struct mm_struct *prev, struct mm_struct *next) TLBMISS_HANDLER_SETUP_PGD(next->pgd); /* mark mmu ownership change */ - cpu_clear(cpu, prev->cpu_vm_mask); - cpu_set(cpu, next->cpu_vm_mask); + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); local_irq_restore(flags); } @@ -258,7 +258,7 @@ drop_mmu_context(struct mm_struct *mm, unsigned cpu) local_irq_save(flags); - if (cpu_isset(cpu, mm->cpu_vm_mask)) { + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { get_new_mmu_context(mm, cpu); #ifdef CONFIG_MIPS_MT_SMTC /* See comments for similar code above */ diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index fd545547b8a..9e09af34c8a 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -19,7 +19,7 @@ struct task_struct; struct plat_smp_ops { void (*send_ipi_single)(int cpu, unsigned int action); - void (*send_ipi_mask)(cpumask_t mask, unsigned int action); + void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action); void (*init_secondary)(void); void (*smp_finish)(void); void (*cpus_done)(void); diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index aaa2d4ab26d..e15f11a0931 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -78,6 +78,6 @@ extern void play_dead(void); extern asmlinkage void smp_call_function_interrupt(void); extern void arch_send_call_function_single_ipi(int cpu); -extern void arch_send_call_function_ipi(cpumask_t mask); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); #endif /* __ASM_SMP_H */ diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c index 5b457a40c78..6d6ca530589 100644 --- a/arch/mips/kernel/init_task.c +++ b/arch/mips/kernel/init_task.c @@ -21,9 +21,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * * The things we do for performance.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"), - __aligned__(THREAD_SIZE))) = +union thread_union init_thread_union __init_task_data + __attribute__((__aligned__(THREAD_SIZE))) = { INIT_THREAD_INFO(init_task) }; /* diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index ad0ff5dc4d5..cc81771b882 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c @@ -80,11 +80,11 @@ void cmp_send_ipi_single(int cpu, unsigned int action) local_irq_restore(flags); } -static void cmp_send_ipi_mask(cpumask_t mask, unsigned int action) +static void cmp_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) cmp_send_ipi_single(i, action); } @@ -171,7 +171,7 @@ void __init cmp_smp_setup(void) for (i = 1; i < NR_CPUS; i++) { if (amon_cpu_avail(i)) { - cpu_set(i, cpu_possible_map); + set_cpu_possible(i, true); __cpu_number_map[i] = ++ncpu; __cpu_logical_map[ncpu] = i; } diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index 6f7ee5ac46e..43e7cdc5ded 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -70,7 +70,7 @@ static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0, write_vpe_c0_vpeconf0(tmp); /* Record this as available CPU */ - cpu_set(tc, cpu_possible_map); + set_cpu_possible(tc, true); __cpu_number_map[tc] = ++ncpu; __cpu_logical_map[ncpu] = tc; } @@ -141,11 +141,11 @@ static void vsmp_send_ipi_single(int cpu, unsigned int action) local_irq_restore(flags); } -static void vsmp_send_ipi_mask(cpumask_t mask, unsigned int action) +static void vsmp_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) vsmp_send_ipi_single(i, action); } diff --git a/arch/mips/kernel/smp-up.c b/arch/mips/kernel/smp-up.c index 2508d55d68f..00500fea275 100644 --- a/arch/mips/kernel/smp-up.c +++ b/arch/mips/kernel/smp-up.c @@ -18,7 +18,8 @@ static void up_send_ipi_single(int cpu, unsigned int action) panic(KERN_ERR "%s called", __func__); } -static inline void up_send_ipi_mask(cpumask_t mask, unsigned int action) +static inline void up_send_ipi_mask(const struct cpumask *mask, + unsigned int action) { panic(KERN_ERR "%s called", __func__); } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 64668a93248..4eb106c6a3e 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -128,7 +128,7 @@ asmlinkage __cpuinit void start_secondary(void) cpu_idle(); } -void arch_send_call_function_ipi(cpumask_t mask) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { mp_ops->send_ipi_mask(mask, SMP_CALL_FUNCTION); } @@ -183,15 +183,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus) mp_ops->prepare_cpus(max_cpus); set_cpu_sibling_map(0); #ifndef CONFIG_HOTPLUG_CPU - cpu_present_map = cpu_possible_map; + init_cpu_present(&cpu_possible_map); #endif } /* preload SMP state for boot cpu */ void __devinit smp_prepare_boot_cpu(void) { - cpu_set(0, cpu_possible_map); - cpu_set(0, cpu_online_map); + set_cpu_possible(0, true); + set_cpu_online(0, true); cpu_set(0, cpu_callin_map); } diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index 1a466baf0ed..67153a0dc26 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -305,7 +305,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot) */ ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1; for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) { - cpu_set(i, cpu_possible_map); + set_cpu_possible(i, true); __cpu_number_map[i] = i; __cpu_logical_map[i] = i; } @@ -525,8 +525,8 @@ void smtc_prepare_cpus(int cpus) * Pull any physically present but unused TCs out of circulation. */ while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) { - cpu_clear(tc, cpu_possible_map); - cpu_clear(tc, cpu_present_map); + set_cpu_possible(tc, false); + set_cpu_present(tc, false); tc++; } diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 2769bed3d2a..9bf0e3df7c5 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -10,7 +10,16 @@ PHDRS { text PT_LOAD FLAGS(7); /* RWX */ note PT_NOTE FLAGS(4); /* R__ */ } -jiffies = JIFFIES; + +ifdef CONFIG_32BIT + ifdef CONFIG_CPU_LITTLE_ENDIAN + jiffies = jiffies_64; + else + jiffies = jiffies_64 + 4; + endif +else + jiffies = jiffies_64; +endif SECTIONS { @@ -29,7 +38,7 @@ SECTIONS /* . = 0xa800000000300000; */ . = 0xffffffff80300000; #endif - . = LOADADDR; + . = VMLINUX_LOAD_ADDRESS; /* read-only */ _text = .; /* Text and read-only data */ .text : { diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c index 3f04d4c406b..b3deed8db61 100644 --- a/arch/mips/lasat/sysctl.c +++ b/arch/mips/lasat/sysctl.c @@ -56,12 +56,12 @@ int sysctl_lasatstring(ctl_table *table, /* And the same for proc */ -int proc_dolasatstring(ctl_table *table, int write, struct file *filp, +int proc_dolasatstring(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int r; - r = proc_dostring(table, write, filp, buffer, lenp, ppos); + r = proc_dostring(table, write, buffer, lenp, ppos); if ((!write) || r) return r; @@ -71,12 +71,12 @@ int proc_dolasatstring(ctl_table *table, int write, struct file *filp, } /* proc function to write EEPROM after changing int entry */ -int proc_dolasatint(ctl_table *table, int write, struct file *filp, +int proc_dolasatint(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int r; - r = proc_dointvec(table, write, filp, buffer, lenp, ppos); + r = proc_dointvec(table, write, buffer, lenp, ppos); if ((!write) || r) return r; @@ -89,7 +89,7 @@ int proc_dolasatint(ctl_table *table, int write, struct file *filp, static int rtctmp; /* proc function to read/write RealTime Clock */ -int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, +int proc_dolasatrtc(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct timespec ts; @@ -102,7 +102,7 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, if (rtctmp < 0) rtctmp = 0; } - r = proc_dointvec(table, write, filp, buffer, lenp, ppos); + r = proc_dointvec(table, write, buffer, lenp, ppos); if (r) return r; @@ -154,7 +154,7 @@ int sysctl_lasat_rtc(ctl_table *table, #endif #ifdef CONFIG_INET -int proc_lasat_ip(ctl_table *table, int write, struct file *filp, +int proc_lasat_ip(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { unsigned int ip; @@ -231,12 +231,12 @@ static int sysctl_lasat_prid(ctl_table *table, return 0; } -int proc_lasat_prid(ctl_table *table, int write, struct file *filp, +int proc_lasat_prid(ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int r; - r = proc_dointvec(table, write, filp, buffer, lenp, ppos); + r = proc_dointvec(table, write, buffer, lenp, ppos); if (r < 0) return r; if (write) { diff --git a/arch/mips/mipssim/sim_smtc.c b/arch/mips/mipssim/sim_smtc.c index d6e4f656ad1..5da30b6a65b 100644 --- a/arch/mips/mipssim/sim_smtc.c +++ b/arch/mips/mipssim/sim_smtc.c @@ -43,11 +43,12 @@ static void ssmtc_send_ipi_single(int cpu, unsigned int action) /* "CPU" may be TC of same VPE, VPE of same CPU, or different CPU */ } -static inline void ssmtc_send_ipi_mask(cpumask_t mask, unsigned int action) +static inline void ssmtc_send_ipi_mask(const struct cpumask *mask, + unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) ssmtc_send_ipi_single(i, action); } diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index 10ab69f7183..94e05e5733c 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -79,7 +79,7 @@ static void octeon_flush_icache_all_cores(struct vm_area_struct *vma) * cores it has been used on */ if (vma) - mask = vma->vm_mm->cpu_vm_mask; + mask = *mm_cpumask(vma->vm_mm); else mask = cpu_online_map; cpu_clear(cpu, mask); diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c index 499ffe5475d..192cfd2a539 100644 --- a/arch/mips/mti-malta/malta-smtc.c +++ b/arch/mips/mti-malta/malta-smtc.c @@ -21,11 +21,11 @@ static void msmtc_send_ipi_single(int cpu, unsigned int action) smtc_send_ipi(cpu, LINUX_SMP_IPI, action); } -static void msmtc_send_ipi_mask(cpumask_t mask, unsigned int action) +static void msmtc_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) msmtc_send_ipi_single(i, action); } diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c index 8ace2771623..326fe7a392e 100644 --- a/arch/mips/pmc-sierra/yosemite/smp.c +++ b/arch/mips/pmc-sierra/yosemite/smp.c @@ -97,11 +97,11 @@ static void yos_send_ipi_single(int cpu, unsigned int action) } } -static void yos_send_ipi_mask(cpumask_t mask, unsigned int action) +static void yos_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) yos_send_ipi_single(i, action); } diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 060d853d7b3..f61c164d1e6 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -421,7 +421,7 @@ static void __init node_mem_init(cnodeid_t node) /* * A node with nothing. We use it to avoid any special casing in - * node_to_cpumask + * cpumask_of_node */ static struct node_data null_node = { .hub = { diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c index cbcd7eb83bd..9aa8f2951df 100644 --- a/arch/mips/sgi-ip27/ip27-smp.c +++ b/arch/mips/sgi-ip27/ip27-smp.c @@ -165,11 +165,11 @@ static void ip27_send_ipi_single(int destid, unsigned int action) REMOTE_HUB_SEND_INTR(COMPACT_TO_NASID_NODEID(cpu_to_node(destid)), irq); } -static void ip27_send_ipi_mask(cpumask_t mask, unsigned int action) +static void ip27_send_ipi(const struct cpumask *mask, unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) ip27_send_ipi_single(i, action); } diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c index 314691648c9..47b347c992e 100644 --- a/arch/mips/sibyte/bcm1480/smp.c +++ b/arch/mips/sibyte/bcm1480/smp.c @@ -82,11 +82,12 @@ static void bcm1480_send_ipi_single(int cpu, unsigned int action) __raw_writeq((((u64)action)<< 48), mailbox_0_set_regs[cpu]); } -static void bcm1480_send_ipi_mask(cpumask_t mask, unsigned int action) +static void bcm1480_send_ipi_mask(const struct cpumask *mask, + unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) bcm1480_send_ipi_single(i, action); } diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c index cad14003b84..c00a5cb1128 100644 --- a/arch/mips/sibyte/sb1250/smp.c +++ b/arch/mips/sibyte/sb1250/smp.c @@ -70,11 +70,12 @@ static void sb1250_send_ipi_single(int cpu, unsigned int action) __raw_writeq((((u64)action) << 48), mailbox_set_regs[cpu]); } -static inline void sb1250_send_ipi_mask(cpumask_t mask, unsigned int action) +static inline void sb1250_send_ipi_mask(const struct cpumask *mask, + unsigned int action) { unsigned int i; - for_each_cpu_mask(i, mask) + for_each_cpu(i, mask) sb1250_send_ipi_single(i, action); } diff --git a/arch/mn10300/include/asm/mmu_context.h b/arch/mn10300/include/asm/mmu_context.h index a9e2e34f69b..cb294c244de 100644 --- a/arch/mn10300/include/asm/mmu_context.h +++ b/arch/mn10300/include/asm/mmu_context.h @@ -38,13 +38,13 @@ extern unsigned long mmu_context_cache[NR_CPUS]; #define enter_lazy_tlb(mm, tsk) do {} while (0) #ifdef CONFIG_SMP -#define cpu_ran_vm(cpu, task) \ - cpu_set((cpu), (task)->cpu_vm_mask) -#define cpu_maybe_ran_vm(cpu, task) \ - cpu_test_and_set((cpu), (task)->cpu_vm_mask) +#define cpu_ran_vm(cpu, mm) \ + cpumask_set_cpu((cpu), mm_cpumask(mm)) +#define cpu_maybe_ran_vm(cpu, mm) \ + cpumask_test_and_set_cpu((cpu), mm_cpumask(mm)) #else -#define cpu_ran_vm(cpu, task) do {} while (0) -#define cpu_maybe_ran_vm(cpu, task) true +#define cpu_ran_vm(cpu, mm) do {} while (0) +#define cpu_maybe_ran_vm(cpu, mm) true #endif /* CONFIG_SMP */ /* diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c index 80d423b80af..a481b043bea 100644 --- a/arch/mn10300/kernel/init_task.c +++ b/arch/mn10300/kernel/init_task.c @@ -27,9 +27,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c index 3e52a105432..8ca5af00334 100644 --- a/arch/mn10300/kernel/sys_mn10300.c +++ b/arch/mn10300/kernel/sys_mn10300.c @@ -19,7 +19,6 @@ #include <linux/stat.h> #include <linux/mman.h> #include <linux/file.h> -#include <linux/utsname.h> #include <linux/tty.h> #include <asm/uaccess.h> diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index da6f66901c9..55cca1dac43 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -118,8 +118,8 @@ define archhelp @echo '* vmlinux - Uncompressed kernel image (./vmlinux)' @echo ' palo - Bootable image (./lifimage)' @echo ' install - Install kernel using' - @echo ' (your) ~/bin/installkernel or' - @echo ' (distribution) /sbin/installkernel or' + @echo ' (your) ~/bin/$(INSTALLKERNEL) or' + @echo ' (distribution) /sbin/$(INSTALLKERNEL) or' @echo ' copy to $$(INSTALL_PATH)' endef diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h index 1e1c824764e..5f39d5597ce 100644 --- a/arch/parisc/include/asm/fcntl.h +++ b/arch/parisc/include/asm/fcntl.h @@ -28,6 +28,8 @@ #define F_SETOWN 12 /* for sockets. */ #define F_SETSIG 13 /* for sockets. */ #define F_GETSIG 14 /* for sockets. */ +#define F_GETOWN_EX 15 +#define F_SETOWN_EX 16 /* for posix fcntl() and lockf() */ #define F_RDLCK 01 diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h index 21eb45a5262..2e73623feb6 100644 --- a/arch/parisc/include/asm/smp.h +++ b/arch/parisc/include/asm/smp.h @@ -30,7 +30,6 @@ extern void smp_send_all_nop(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask #endif /* !ASSEMBLY */ diff --git a/arch/parisc/install.sh b/arch/parisc/install.sh index 9632b3e164c..e593fc8d58b 100644 --- a/arch/parisc/install.sh +++ b/arch/parisc/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi # Default install diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c index 82974b20fc1..d020eae6525 100644 --- a/arch/parisc/kernel/init_task.c +++ b/arch/parisc/kernel/init_task.c @@ -43,8 +43,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((aligned(128))) __attribute__((__section__(".data.init_task"))) = +union thread_union init_thread_union __init_task_data + __attribute__((aligned(128))) = { INIT_THREAD_INFO(init_task) }; #if PT_NLEVELS == 3 diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c index 92a0acaa0d1..561388b17c9 100644 --- a/arch/parisc/kernel/sys_parisc32.c +++ b/arch/parisc/kernel/sys_parisc32.c @@ -18,7 +18,6 @@ #include <linux/signal.h> #include <linux/resource.h> #include <linux/times.h> -#include <linux/utsname.h> #include <linux/time.h> #include <linux/smp.h> #include <linux/smp_lock.h> diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 952a3963e9e..aacf629c1a9 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -158,8 +158,6 @@ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ # Default to zImage, override when needed all: zImage -CPPFLAGS_vmlinux.lds := -Upowerpc - BOOT_TARGETS = zImage zImage.initrd uImage zImage% dtbImage% treeImage.% cuImage.% simpleImage.% PHONY += $(BOOT_TARGETS) @@ -182,8 +180,8 @@ define archhelp @echo ' simpleImage.<dt> - Firmware independent image.' @echo ' treeImage.<dt> - Support for older IBM 4xx firmware (not U-Boot)' @echo ' install - Install kernel using' - @echo ' (your) ~/bin/installkernel or' - @echo ' (distribution) /sbin/installkernel or' + @echo ' (your) ~/bin/$(INSTALLKERNEL) or' + @echo ' (distribution) /sbin/$(INSTALLKERNEL) or' @echo ' install to $$(INSTALL_PATH) and run lilo' @echo ' *_defconfig - Select default config from arch/$(ARCH)/configs' @echo '' diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh index 98312d169c8..b6a256bc96e 100644 --- a/arch/powerpc/boot/install.sh +++ b/arch/powerpc/boot/install.sh @@ -23,8 +23,8 @@ set -e # User may have a custom install script -if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi -if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi # Default install diff --git a/arch/powerpc/include/asm/fsldma.h b/arch/powerpc/include/asm/fsldma.h new file mode 100644 index 00000000000..a67aeed17d4 --- /dev/null +++ b/arch/powerpc/include/asm/fsldma.h @@ -0,0 +1,136 @@ +/* + * Freescale MPC83XX / MPC85XX DMA Controller + * + * Copyright (c) 2009 Ira W. Snyder <iws@ovro.caltech.edu> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ARCH_POWERPC_ASM_FSLDMA_H__ +#define __ARCH_POWERPC_ASM_FSLDMA_H__ + +#include <linux/dmaengine.h> + +/* + * Definitions for the Freescale DMA controller's DMA_SLAVE implemention + * + * The Freescale DMA_SLAVE implementation was designed to handle many-to-many + * transfers. An example usage would be an accelerated copy between two + * scatterlists. Another example use would be an accelerated copy from + * multiple non-contiguous device buffers into a single scatterlist. + * + * A DMA_SLAVE transaction is defined by a struct fsl_dma_slave. This + * structure contains a list of hardware addresses that should be copied + * to/from the scatterlist passed into device_prep_slave_sg(). The structure + * also has some fields to enable hardware-specific features. + */ + +/** + * struct fsl_dma_hw_addr + * @entry: linked list entry + * @address: the hardware address + * @length: length to transfer + * + * Holds a single physical hardware address / length pair for use + * with the DMAEngine DMA_SLAVE API. + */ +struct fsl_dma_hw_addr { + struct list_head entry; + + dma_addr_t address; + size_t length; +}; + +/** + * struct fsl_dma_slave + * @addresses: a linked list of struct fsl_dma_hw_addr structures + * @request_count: value for DMA request count + * @src_loop_size: setup and enable constant source-address DMA transfers + * @dst_loop_size: setup and enable constant destination address DMA transfers + * @external_start: enable externally started DMA transfers + * @external_pause: enable externally paused DMA transfers + * + * Holds a list of address / length pairs for use with the DMAEngine + * DMA_SLAVE API implementation for the Freescale DMA controller. + */ +struct fsl_dma_slave { + + /* List of hardware address/length pairs */ + struct list_head addresses; + + /* Support for extra controller features */ + unsigned int request_count; + unsigned int src_loop_size; + unsigned int dst_loop_size; + bool external_start; + bool external_pause; +}; + +/** + * fsl_dma_slave_append - add an address/length pair to a struct fsl_dma_slave + * @slave: the &struct fsl_dma_slave to add to + * @address: the hardware address to add + * @length: the length of bytes to transfer from @address + * + * Add a hardware address/length pair to a struct fsl_dma_slave. Returns 0 on + * success, -ERRNO otherwise. + */ +static inline int fsl_dma_slave_append(struct fsl_dma_slave *slave, + dma_addr_t address, size_t length) +{ + struct fsl_dma_hw_addr *addr; + + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); + if (!addr) + return -ENOMEM; + + INIT_LIST_HEAD(&addr->entry); + addr->address = address; + addr->length = length; + + list_add_tail(&addr->entry, &slave->addresses); + return 0; +} + +/** + * fsl_dma_slave_free - free a struct fsl_dma_slave + * @slave: the struct fsl_dma_slave to free + * + * Free a struct fsl_dma_slave and all associated address/length pairs + */ +static inline void fsl_dma_slave_free(struct fsl_dma_slave *slave) +{ + struct fsl_dma_hw_addr *addr, *tmp; + + if (slave) { + list_for_each_entry_safe(addr, tmp, &slave->addresses, entry) { + list_del(&addr->entry); + kfree(addr); + } + + kfree(slave); + } +} + +/** + * fsl_dma_slave_alloc - allocate a struct fsl_dma_slave + * @gfp: the flags to pass to kmalloc when allocating this structure + * + * Allocate a struct fsl_dma_slave for use by the DMA_SLAVE API. Returns a new + * struct fsl_dma_slave on success, or NULL on failure. + */ +static inline struct fsl_dma_slave *fsl_dma_slave_alloc(gfp_t gfp) +{ + struct fsl_dma_slave *slave; + + slave = kzalloc(sizeof(*slave), gfp); + if (!slave) + return NULL; + + INIT_LIST_HEAD(&slave->addresses); + return slave; +} + +#endif /* __ARCH_POWERPC_ASM_FSLDMA_H__ */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index c0d3b8af931..d9ea8d39c34 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -146,7 +146,7 @@ extern void smp_generic_take_timebase(void); extern struct smp_ops_t *smp_ops; extern void arch_send_call_function_single_ipi(int cpu); -extern void arch_send_call_function_ipi(cpumask_t mask); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); /* Definitions relative to the secondary CPU spin loop * and entry point. Not all of them exist on both 32 and diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 394edcbcce7..22f738d12ad 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -17,11 +17,6 @@ static inline int cpu_to_node(int cpu) #define parent_node(node) (node) -static inline cpumask_t node_to_cpumask(int node) -{ - return numa_cpumask_lookup_table[node]; -} - #define cpumask_of_node(node) (&numa_cpumask_lookup_table[node]) int of_node_to_nid(struct device_node *device); @@ -36,11 +31,6 @@ static inline int pcibus_to_node(struct pci_bus *bus) } #endif -#define pcibus_to_cpumask(bus) (pcibus_to_node(bus) == -1 ? \ - CPU_MASK_ALL : \ - node_to_cpumask(pcibus_to_node(bus)) \ - ) - #define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ cpu_all_mask : \ cpumask_of_node(pcibus_to_node(bus))) @@ -104,8 +94,6 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev, #ifdef CONFIG_PPC64 #include <asm/smp.h> -#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) -#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c index ffc4253fef5..2375b7eb1c7 100644 --- a/arch/powerpc/kernel/init_task.c +++ b/arch/powerpc/kernel/init_task.c @@ -16,9 +16,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 49e705fcee6..040bd1de8d9 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -13,6 +13,7 @@ #include <linux/kexec.h> #include <linux/smp.h> #include <linux/thread_info.h> +#include <linux/init_task.h> #include <linux/errno.h> #include <asm/page.h> @@ -249,8 +250,8 @@ static void kexec_prepare_cpus(void) * We could use a smaller stack if we don't care about anything using * current, but that audit has not been performed. */ -static union thread_union kexec_stack - __attribute__((__section__(".data.init_task"))) = { }; +static union thread_union kexec_stack __init_task_data = + { }; /* Our assembly helper, in kexec_stub.S */ extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start, diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1d5570a1e45..4271f7a655a 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -24,7 +24,6 @@ #include <linux/seq_file.h> #include <linux/ioport.h> #include <linux/console.h> -#include <linux/utsname.h> #include <linux/screen_info.h> #include <linux/root_dev.h> #include <linux/notifier.h> @@ -432,9 +431,9 @@ void __init smp_setup_cpu_maps(void) for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, intserv[j]); - cpu_set(cpu, cpu_present_map); + set_cpu_present(cpu, true); set_hard_smp_processor_id(cpu, intserv[j]); - cpu_set(cpu, cpu_possible_map); + set_cpu_possible(cpu, true); cpu++; } } @@ -480,7 +479,7 @@ void __init smp_setup_cpu_maps(void) maxcpus); for (cpu = 0; cpu < maxcpus; cpu++) - cpu_set(cpu, cpu_possible_map); + set_cpu_possible(cpu, true); out: of_node_put(dn); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index d387b3937cc..9b86a74d281 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -189,11 +189,11 @@ void arch_send_call_function_single_ipi(int cpu) smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE); } -void arch_send_call_function_ipi(cpumask_t mask) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { unsigned int cpu; - for_each_cpu_mask(cpu, mask) + for_each_cpu(cpu, mask) smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION); } @@ -287,7 +287,7 @@ void __devinit smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != boot_cpuid); - cpu_set(boot_cpuid, cpu_online_map); + set_cpu_online(boot_cpuid, true); cpu_set(boot_cpuid, per_cpu(cpu_sibling_map, boot_cpuid)); cpu_set(boot_cpuid, per_cpu(cpu_core_map, boot_cpuid)); #ifdef CONFIG_PPC64 @@ -307,7 +307,7 @@ int generic_cpu_disable(void) if (cpu == boot_cpuid) return -EBUSY; - cpu_clear(cpu, cpu_online_map); + set_cpu_online(cpu, false); #ifdef CONFIG_PPC64 vdso_data->processorCount--; fixup_irqs(cpu_online_map); @@ -361,7 +361,7 @@ void generic_mach_cpu_die(void) smp_wmb(); while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) cpu_relax(); - cpu_set(cpu, cpu_online_map); + set_cpu_online(cpu, true); local_irq_enable(); } #endif @@ -508,7 +508,7 @@ int __devinit start_secondary(void *unused) ipi_call_lock(); notify_cpu_starting(cpu); - cpu_set(cpu, cpu_online_map); + set_cpu_online(cpu, true); /* Update sibling maps */ base = cpu_first_thread_in_core(cpu); for (i = 0; i < threads_per_core; i++) { diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 1cc5e9e5da9..b97c2d67f4a 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -22,7 +22,6 @@ #include <linux/signal.h> #include <linux/resource.h> #include <linux/times.h> -#include <linux/utsname.h> #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/sem.h> diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index a0abce251d0..3faaf29bdb2 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -1,3 +1,4 @@ + /* * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. * <benh@kernel.crashing.org> @@ -74,7 +75,7 @@ static int vdso_ready; static union { struct vdso_data data; u8 page[PAGE_SIZE]; -} vdso_data_store __attribute__((__section__(".data.page_aligned"))); +} vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = &vdso_data_store.data; /* Format of the patch table */ diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index b54b8168813..51ead52141b 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -16,7 +16,7 @@ GCOV_PROFILE := n EXTRA_CFLAGS := -shared -fno-common -fno-builtin EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) EXTRA_AFLAGS := -D__VDSO32__ -s obj-y += vdso32_wrapper.o diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S index 556f0caa5d8..6e8f507ed32 100644 --- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S @@ -1,7 +1,8 @@ #include <linux/init.h> +#include <linux/linkage.h> #include <asm/page.h> - .section ".data.page_aligned" + __PAGE_ALIGNED_DATA .globl vdso32_start, vdso32_end .balign PAGE_SIZE diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index dd0c8e93677..79da65d44a2 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -11,7 +11,7 @@ GCOV_PROFILE := n EXTRA_CFLAGS := -shared -fno-common -fno-builtin EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) EXTRA_AFLAGS := -D__VDSO64__ -s obj-y += vdso64_wrapper.o diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S index 0529cb9e3b9..b8553d62b79 100644 --- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S @@ -1,7 +1,8 @@ #include <linux/init.h> +#include <linux/linkage.h> #include <asm/page.h> - .section ".data.page_aligned" + __PAGE_ALIGNED_DATA .globl vdso64_start, vdso64_end .balign PAGE_SIZE diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 937a38e7317..b40c22d697f 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -320,7 +320,7 @@ static int __init smp_psurge_probe(void) if (ncpus > NR_CPUS) ncpus = NR_CPUS; for (i = 1; i < ncpus ; ++i) - cpu_set(i, cpu_present_map); + set_cpu_present(i, true); if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352); @@ -867,7 +867,7 @@ static void __devinit smp_core99_setup_cpu(int cpu_nr) int smp_core99_cpu_disable(void) { - cpu_clear(smp_processor_id(), cpu_online_map); + set_cpu_online(smp_processor_id(), false); /* XXX reset cpu affinity here */ mpic_cpu_set_priority(0xf); @@ -952,7 +952,7 @@ void __init pmac_setup_smp(void) int cpu; for (cpu = 1; cpu < 4 && cpu < NR_CPUS; ++cpu) - cpu_set(cpu, cpu_possible_map); + set_cpu_possible(cpu, true); smp_ops = &psurge_smp_ops; } #endif /* CONFIG_PPC32 */ diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index a20ead87153..ebff6d9a4e3 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -94,7 +94,7 @@ static int pseries_cpu_disable(void) { int cpu = smp_processor_id(); - cpu_clear(cpu, cpu_online_map); + set_cpu_online(cpu, false); vdso_data->processorCount--; /*fix boot_cpuid here*/ @@ -185,7 +185,7 @@ static int pseries_add_processor(struct device_node *np) for_each_cpu_mask(cpu, tmp) { BUG_ON(cpu_isset(cpu, cpu_present_map)); - cpu_set(cpu, cpu_present_map); + set_cpu_present(cpu, true); set_hard_smp_processor_id(cpu, *intserv++); } err = 0; @@ -217,7 +217,7 @@ static void pseries_remove_processor(struct device_node *np) if (get_hard_smp_processor_id(cpu) != intserv[i]) continue; BUG_ON(cpu_online(cpu)); - cpu_clear(cpu, cpu_present_map); + set_cpu_present(cpu, false); set_hard_smp_processor_id(cpu, -1); break; } diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 264528e4f58..b55fd7ed1c3 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -50,10 +50,9 @@ static struct platform_device *appldata_pdev; * /proc entries (sysctl) */ static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; -static int appldata_timer_handler(ctl_table *ctl, int write, struct file *filp, +static int appldata_timer_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); static int appldata_interval_handler(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); @@ -247,7 +246,7 @@ __appldata_vtimer_setup(int cmd) * Start/Stop timer, show status of timer (0 = not active, 1 = active) */ static int -appldata_timer_handler(ctl_table *ctl, int write, struct file *filp, +appldata_timer_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int len; @@ -289,7 +288,7 @@ out: * current timer interval. */ static int -appldata_interval_handler(ctl_table *ctl, int write, struct file *filp, +appldata_interval_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int len, interval; @@ -335,7 +334,7 @@ out: * monitoring (0 = not in process, 1 = in process) */ static int -appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, +appldata_generic_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index d4026f62cb0..aed3069699b 100644 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi -if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi # Default install - same as make zlilo diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index c991fe6473c..a868b272c25 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -62,7 +62,7 @@ extern struct mutex smp_cpu_state_mutex; extern int smp_cpu_polarization[]; extern void arch_send_call_function_single_ipi(int cpu); -extern void arch_send_call_function_ipi(cpumask_t mask); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); #endif diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 5e0ad618dc4..6e7211abd95 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -9,7 +9,6 @@ const struct cpumask *cpu_coregroup_mask(unsigned int cpu); extern cpumask_t cpu_core_map[NR_CPUS]; -#define topology_core_siblings(cpu) (cpu_core_map[cpu]) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) int topology_set_cpu_management(int fc); diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 5519cb74510..0debcec23a3 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -24,7 +24,6 @@ #include <linux/signal.h> #include <linux/resource.h> #include <linux/times.h> -#include <linux/utsname.h> #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/sem.h> diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 4c512561687..20f282c911c 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -881,11 +881,11 @@ static int debug_active=1; * if debug_active is already off */ static int -s390dbf_procactive(ctl_table *table, int write, struct file *filp, +s390dbf_procactive(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) - return proc_dointvec(table, write, filp, buffer, lenp, ppos); + return proc_dointvec(table, write, buffer, lenp, ppos); else return 0; } diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c index fe787f9e5f3..4d1c9fb0b54 100644 --- a/arch/s390/kernel/init_task.c +++ b/arch/s390/kernel/init_task.c @@ -25,9 +25,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 59fe6ecc6ed..5417eb57271 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -27,7 +27,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/notifier.h> -#include <linux/utsname.h> #include <linux/tick.h> #include <linux/elfcore.h> #include <linux/kernel_stat.h> diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b4b6396e6cf..c932caa5e85 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -147,11 +147,11 @@ static void smp_ext_bitcall(int cpu, ec_bit_sig sig) udelay(10); } -void arch_send_call_function_ipi(cpumask_t mask) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { int cpu; - for_each_cpu_mask(cpu, mask) + for_each_cpu(cpu, mask) smp_ext_bitcall(cpu, ec_call_function); } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 45e1708b70f..45a3e9a7ae2 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -75,7 +75,7 @@ __setup("vdso=", vdso_setup); static union { struct vdso_data data; u8 page[PAGE_SIZE]; -} vdso_data_store __attribute__((__section__(".data.page_aligned"))); +} vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = &vdso_data_store.data; /* diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index ca78ad60ba2..d13e8755a8c 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -13,7 +13,7 @@ KBUILD_AFLAGS_31 += -m31 -s KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31) $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31) diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S index 61639a89e70..ae42f8ce350 100644 --- a/arch/s390/kernel/vdso32/vdso32_wrapper.S +++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S @@ -1,7 +1,8 @@ #include <linux/init.h> +#include <linux/linkage.h> #include <asm/page.h> - .section ".data.page_aligned" + __PAGE_ALIGNED_DATA .globl vdso32_start, vdso32_end .balign PAGE_SIZE diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 6fc8e829258..449352dda9c 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -13,7 +13,7 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S index d8e2ac14d56..c245842b516 100644 --- a/arch/s390/kernel/vdso64/vdso64_wrapper.S +++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S @@ -1,7 +1,8 @@ #include <linux/init.h> +#include <linux/linkage.h> #include <asm/page.h> - .section ".data.page_aligned" + __PAGE_ALIGNED_DATA .globl vdso64_start, vdso64_end .balign PAGE_SIZE diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 413c240cbca..b201135cc18 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -262,7 +262,7 @@ cmm_skip_blanks(char *cp, char **endp) static struct ctl_table cmm_table[]; static int -cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, +cmm_pages_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[16], *p; @@ -303,7 +303,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, } static int -cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, +cmm_timeout_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char buf[64], *p; diff --git a/arch/score/kernel/init_task.c b/arch/score/kernel/init_task.c index ff952f6c63f..baa03ee217d 100644 --- a/arch/score/kernel/init_task.c +++ b/arch/score/kernel/init_task.c @@ -34,9 +34,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"), __aligned__(THREAD_SIZE))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/sh/boot/compressed/install.sh b/arch/sh/boot/compressed/install.sh index 90589f0fec1..f9f41818b17 100644 --- a/arch/sh/boot/compressed/install.sh +++ b/arch/sh/boot/compressed/install.sh @@ -23,8 +23,8 @@ # User may have a custom install script -if [ -x /sbin/installkernel ]; then - exec /sbin/installkernel "$@" +if [ -x /sbin/${INSTALLKERNEL} ]; then + exec /sbin/${INSTALLKERNEL} "$@" fi if [ "$2" = "zImage" ]; then diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig index b91fa8dbf04..4d58eb0973d 100644 --- a/arch/sh/drivers/dma/Kconfig +++ b/arch/sh/drivers/dma/Kconfig @@ -1,12 +1,9 @@ menu "DMA support" -config SH_DMA_API - bool config SH_DMA bool "SuperH on-chip DMA controller (DMAC) support" depends on CPU_SH3 || CPU_SH4 - select SH_DMA_API default n config SH_DMA_IRQ_MULTI @@ -19,6 +16,15 @@ config SH_DMA_IRQ_MULTI CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \ CPU_SUBTYPE_SH7760 +config SH_DMA_API + depends on SH_DMA + bool "SuperH DMA API support" + default n + help + SH_DMA_API always enabled DMA API of used SuperH. + If you want to use DMA ENGINE, you must not enable this. + Please enable DMA_ENGINE and SH_DMAE. + config NR_ONCHIP_DMA_CHANNELS int depends on SH_DMA diff --git a/arch/sh/drivers/dma/Makefile b/arch/sh/drivers/dma/Makefile index c6068137b46..d88c9484762 100644 --- a/arch/sh/drivers/dma/Makefile +++ b/arch/sh/drivers/dma/Makefile @@ -2,8 +2,7 @@ # Makefile for the SuperH DMA specific kernel interface routines under Linux. # -obj-$(CONFIG_SH_DMA_API) += dma-api.o dma-sysfs.o -obj-$(CONFIG_SH_DMA) += dma-sh.o +obj-$(CONFIG_SH_DMA_API) += dma-sh.o dma-api.o dma-sysfs.o obj-$(CONFIG_PVR2_DMA) += dma-pvr2.o obj-$(CONFIG_G2_DMA) += dma-g2.o obj-$(CONFIG_SH_DMABRG) += dmabrg.o diff --git a/arch/sh/include/asm/dma-sh.h b/arch/sh/include/asm/dma-sh.h index 68a5f4cb034..78eed3e0bdf 100644 --- a/arch/sh/include/asm/dma-sh.h +++ b/arch/sh/include/asm/dma-sh.h @@ -116,4 +116,17 @@ static u32 dma_base_addr[] __maybe_unused = { #define CHCR 0x0C #define DMAOR 0x40 +/* + * for dma engine + * + * SuperH DMA mode + */ +#define SHDMA_MIX_IRQ (1 << 1) +#define SHDMA_DMAOR1 (1 << 2) +#define SHDMA_DMAE1 (1 << 3) + +struct sh_dmae_pdata { + unsigned int mode; +}; + #endif /* __DMA_SH_H */ diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h index ca64f43abe6..53ef26ced75 100644 --- a/arch/sh/include/asm/smp.h +++ b/arch/sh/include/asm/smp.h @@ -44,7 +44,6 @@ void plat_send_ipi(unsigned int cpu, unsigned int message); void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask #else diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index f8c40cc6505..65e7bd2f224 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -31,7 +31,6 @@ #define cpu_to_node(cpu) ((void)(cpu),0) #define parent_node(node) ((void)(node),0) -#define node_to_cpumask(node) ((void)node, cpu_online_map) #define cpumask_of_node(node) ((void)node, cpu_online_mask) #define pcibus_to_node(bus) ((void)(bus), -1) diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c index 1719957c0a6..11f2ea556a6 100644 --- a/arch/sh/kernel/init_task.c +++ b/arch/sh/kernel/init_task.c @@ -17,9 +17,8 @@ struct pt_regs fake_swapper_regs; * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 60f8af4497c..7cb933ba495 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -165,11 +165,9 @@ asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs) } #ifdef CONFIG_IRQSTACKS -static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); +static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; -static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); +static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; /* * allocate per-cpu stacks for hardirq and for softirq processing diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index 63ba12836ea..eb68bfdd86e 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -9,7 +9,6 @@ #include <linux/syscalls.h> #include <linux/mman.h> #include <linux/file.h> -#include <linux/utsname.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/ipc.h> diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c index 91fb8445a5a..287235768bc 100644 --- a/arch/sh/kernel/sys_sh64.c +++ b/arch/sh/kernel/sys_sh64.c @@ -23,7 +23,6 @@ #include <linux/stat.h> #include <linux/mman.h> #include <linux/file.h> -#include <linux/utsname.h> #include <linux/syscalls.h> #include <linux/ipc.h> #include <asm/uaccess.h> diff --git a/arch/sh/kernel/vsyscall/Makefile b/arch/sh/kernel/vsyscall/Makefile index 4bbce1cfa35..8f0ea5fc835 100644 --- a/arch/sh/kernel/vsyscall/Makefile +++ b/arch/sh/kernel/vsyscall/Makefile @@ -15,7 +15,7 @@ quiet_cmd_syscall = SYSCALL $@ export CPPFLAGS_vsyscall.lds += -P -C -Ush vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) SYSCFLAGS_vsyscall-trapa.so = $(vsyscall-flags) diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 467221dd570..dfe272d1446 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -31,7 +31,6 @@ export BITS := 32 #KBUILD_CFLAGS += -g -pipe -fcall-used-g5 -fcall-used-g7 KBUILD_CFLAGS += -m32 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7 KBUILD_AFLAGS += -m32 -CPPFLAGS_vmlinux.lds += -m32 #LDFLAGS_vmlinux = -N -Ttext 0xf0004000 # Since 2.5.40, the first stage is left not btfix-ed. @@ -45,9 +44,6 @@ else CHECKFLAGS += -D__sparc__ -D__sparc_v9__ -D__arch64__ -m64 -# Undefine sparc when processing vmlinux.lds - it is used -# And teach CPP we are doing 64 bit builds (for this case) -CPPFLAGS_vmlinux.lds += -m64 -Usparc LDFLAGS := -m elf64_sparc export BITS := 64 diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index becb6bf353a..f49e11cd4de 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -36,7 +36,6 @@ extern int sparc64_multi_core; extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask /* * General functions that each host system must provide. diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 26cd25c0839..600a79035fa 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -12,22 +12,8 @@ static inline int cpu_to_node(int cpu) #define parent_node(node) (node) -static inline cpumask_t node_to_cpumask(int node) -{ - return numa_cpumask_lookup_table[node]; -} #define cpumask_of_node(node) (&numa_cpumask_lookup_table[node]) -/* - * Returns a pointer to the cpumask of CPUs on Node 'node'. - * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" - */ -#define node_to_cpumask_ptr(v, node) \ - cpumask_t *v = &(numa_cpumask_lookup_table[node]) - -#define node_to_cpumask_ptr_next(v, node) \ - v = &(numa_cpumask_lookup_table[node]) - struct pci_bus; #ifdef CONFIG_PCI extern int pcibus_to_node(struct pci_bus *pbus); @@ -71,8 +57,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus) #ifdef CONFIG_SMP #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) #define topology_core_id(cpu) (cpu_data(cpu).core_id) -#define topology_core_siblings(cpu) (cpu_core_map[cpu]) -#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) #define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #define mc_capable() (sparc64_multi_core) diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 3a048fad7ee..5b47fab9966 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -7,7 +7,11 @@ ccflags-y := -Werror extra-y := head_$(BITS).o extra-y += init_task.o -extra-y += vmlinux.lds + +# Undefine sparc when processing vmlinux.lds - it is used +# And teach CPP we are doing $(BITS) builds (for this case) +CPPFLAGS_vmlinux.lds := -Usparc -m$(BITS) +extra-y += vmlinux.lds obj-$(CONFIG_SPARC32) += entry.o wof.o wuf.o obj-$(CONFIG_SPARC32) += etrap_32.o diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c index 28125c5b3d3..5fe3d65581f 100644 --- a/arch/sparc/kernel/init_task.c +++ b/arch/sparc/kernel/init_task.c @@ -18,6 +18,5 @@ EXPORT_SYMBOL(init_task); * If this is not aligned on a 8k boundry, then you should change code * in etrap.S which assumes it. */ -union thread_union init_thread_union - __attribute__((section (".data.init_task"))) - = { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index f5000a460c0..04e28b2671c 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -16,7 +16,6 @@ #include <linux/signal.h> #include <linux/resource.h> #include <linux/times.h> -#include <linux/utsname.h> #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/sem.h> diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h index 15c2d752b2b..a63c5d2d984 100644 --- a/arch/sparc/kernel/systbls.h +++ b/arch/sparc/kernel/systbls.h @@ -3,10 +3,11 @@ #include <linux/kernel.h> #include <linux/types.h> -#include <linux/utsname.h> #include <asm/utrap.h> #include <asm/signal.h> +struct new_utsname; + extern asmlinkage unsigned long sys_getpagesize(void); extern asmlinkage unsigned long sparc_brk(unsigned long brk); extern asmlinkage long sparc_pipe(struct pt_regs *regs); diff --git a/arch/um/Makefile b/arch/um/Makefile index 0728def3223..fc633dbacf8 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -96,11 +96,10 @@ CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) $(call cc-option, -fno-stack-protector,) \ $(call cc-option, -fno-stack-protector-all,) -CONFIG_KERNEL_STACK_ORDER ?= 2 -STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] ) - -CPPFLAGS_vmlinux.lds = -U$(SUBARCH) -DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \ - -DELF_FORMAT="$(ELF_FORMAT)" -DKERNEL_STACK_SIZE=$(STACK_SIZE) +# Options used by linker script +export LDS_START := $(START) +export LDS_ELF_ARCH := $(ELF_ARCH) +export LDS_ELF_FORMAT := $(ELF_FORMAT) # The wrappers will select whether using "malloc" or the kernel allocator. LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index 54f42e8b010..34d813011b7 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, unsigned cpu = smp_processor_id(); if(prev != next){ - cpu_clear(cpu, prev->cpu_vm_mask); - cpu_set(cpu, next->cpu_vm_mask); + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); if(next != &init_mm) __switch_mm(&next->context.id); } diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index 388ec0a3ea9..1119233597a 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -3,6 +3,9 @@ # Licensed under the GPL # +CPPFLAGS_vmlinux.lds := -U$(SUBARCH) -DSTART=$(LDS_START) \ + -DELF_ARCH=$(LDS_ELF_ARCH) \ + -DELF_FORMAT=$(LDS_ELF_FORMAT) extra-y := vmlinux.lds clean-files := diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c index b25121b537d..8aa77b61a5f 100644 --- a/arch/um/kernel/init_task.c +++ b/arch/um/kernel/init_task.c @@ -30,9 +30,8 @@ EXPORT_SYMBOL(init_task); * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; union thread_union cpu0_irqstack __attribute__((__section__(".data.init_irqstack"))) = diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index 98351c78bc8..106bf27e2a9 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c @@ -111,7 +111,7 @@ void smp_prepare_cpus(unsigned int maxcpus) int i; for (i = 0; i < ncpus; ++i) - cpu_set(i, cpu_possible_map); + set_cpu_possible(i, true); cpu_clear(me, cpu_online_map); cpu_set(me, cpu_online_map); diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S index f8aeb448aab..16e49bfa2b4 100644 --- a/arch/um/kernel/vmlinux.lds.S +++ b/arch/um/kernel/vmlinux.lds.S @@ -1,3 +1,6 @@ + +KERNEL_STACK_SIZE = 4096 * (1 << CONFIG_KERNEL_STACK_ORDER); + #ifdef CONFIG_LD_SCRIPT_STATIC #include "uml.lds.S" #else diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 7983c420eaf..a012ee8ef80 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -179,8 +179,8 @@ archclean: define archhelp echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' echo ' install - Install kernel using' - echo ' (your) ~/bin/installkernel or' - echo ' (distribution) /sbin/installkernel or' + echo ' (your) ~/bin/$(INSTALLKERNEL) or' + echo ' (distribution) /sbin/$(INSTALLKERNEL) or' echo ' install to $$(INSTALL_PATH) and run lilo' echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh index 8d60ee15dfd..d13ec1c3864 100644 --- a/arch/x86/boot/install.sh +++ b/arch/x86/boot/install.sh @@ -33,8 +33,8 @@ verify "$3" # User may have a custom install script -if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi -if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi # Default install - same as make zlilo diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h index 5d367caa0e3..549860d3be8 100644 --- a/arch/x86/include/asm/cache.h +++ b/arch/x86/include/asm/cache.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_CACHE_H #define _ASM_X86_CACHE_H +#include <linux/linkage.h> + /* L1 cache line size */ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) @@ -13,7 +15,7 @@ #ifdef CONFIG_SMP #define __cacheline_aligned_in_smp \ __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \ - __attribute__((__section__(".data.page_aligned"))) + __page_aligned_data #endif #endif diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index f923203dc39..4a2d4e0c18d 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -37,12 +37,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, if (likely(prev != next)) { /* stop flush ipis for the previous mm */ - cpu_clear(cpu, prev->cpu_vm_mask); + cpumask_clear_cpu(cpu, mm_cpumask(prev)); #ifdef CONFIG_SMP percpu_write(cpu_tlbstate.state, TLBSTATE_OK); percpu_write(cpu_tlbstate.active_mm, next); #endif - cpu_set(cpu, next->cpu_vm_mask); + cpumask_set_cpu(cpu, mm_cpumask(next)); /* Re-load page tables */ load_cr3(next->pgd); @@ -58,7 +58,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, percpu_write(cpu_tlbstate.state, TLBSTATE_OK); BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); - if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { + if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) { /* We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index e63cf7d441e..139d4c1a33a 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog; #define NMI_INVALID 3 struct ctl_table; -struct file; -extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, +extern int proc_nmi_enabled(struct ctl_table *, int , void __user *, size_t *, loff_t *); extern int unknown_nmi_panic; diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index f76a162c082..ada8c201d51 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -143,7 +143,11 @@ static inline int __pcibus_to_node(const struct pci_bus *bus) static inline const struct cpumask * cpumask_of_pcibus(const struct pci_bus *bus) { - return cpumask_of_node(__pcibus_to_node(bus)); + int node; + + node = __pcibus_to_node(bus); + return (node == -1) ? cpu_online_mask : + cpumask_of_node(node); } #endif diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 6a84ed166ae..1e796782cd7 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -121,7 +121,6 @@ static inline void arch_send_call_function_single_ipi(int cpu) smp_ops.send_call_func_single_ipi(cpu); } -#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) { smp_ops.send_call_func_ipi(mask); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 64970b9885f..dc69f28489f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -227,17 +227,14 @@ static struct irq_cfg *get_one_free_irq_cfg(int node) cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); if (cfg) { - if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { + if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { kfree(cfg); cfg = NULL; - } else if (!alloc_cpumask_var_node(&cfg->old_domain, + } else if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_ATOMIC, node)) { free_cpumask_var(cfg->domain); kfree(cfg); cfg = NULL; - } else { - cpumask_clear(cfg->domain); - cpumask_clear(cfg->old_domain); } } diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index cb66a22d98a..7ff61d6a188 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) /* * proc handler for /proc/sys/kernel/nmi */ -int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, +int proc_nmi_enabled(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { int old_state; nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); if (!!old_state == !!nmi_watchdog_enabled) return 0; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index bca5fba91c9..f7dd2a7c3bf 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -5,7 +5,6 @@ #include <linux/kallsyms.h> #include <linux/kprobes.h> #include <linux/uaccess.h> -#include <linux/utsname.h> #include <linux/hardirq.h> #include <linux/kdebug.h> #include <linux/module.h> diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 54b0a327676..a071e6be177 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -5,7 +5,6 @@ #include <linux/kallsyms.h> #include <linux/kprobes.h> #include <linux/uaccess.h> -#include <linux/utsname.h> #include <linux/hardirq.h> #include <linux/kdebug.h> #include <linux/module.h> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b766e8c7252..218aad7ee76 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -608,7 +608,7 @@ ENTRY(initial_code) /* * BSS section */ -.section ".bss.page_aligned","wa" +__PAGE_ALIGNED_BSS .align PAGE_SIZE_asm #ifdef CONFIG_X86_PAE swapper_pg_pmd: @@ -626,7 +626,7 @@ ENTRY(empty_zero_page) * This starts the data section. */ #ifdef CONFIG_X86_PAE -.section ".data.page_aligned","wa" +__PAGE_ALIGNED_DATA /* Page-aligned for the benefit of paravirt? */ .align PAGE_SIZE_asm ENTRY(swapper_pg_dir) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index fa54f78e2a0..d0bc0a13a43 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -418,7 +418,7 @@ ENTRY(phys_base) ENTRY(idt_table) .skip IDT_ENTRIES * 16 - .section .bss.page_aligned, "aw", @nobits + __PAGE_ALIGNED_BSS .align PAGE_SIZE ENTRY(empty_zero_page) .skip PAGE_SIZE diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 270ff83efc1..3a54dcb9cd0 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; /* * Initial task structure. diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 71f1d99a635..ec6ef60cbd1 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -67,8 +67,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) #ifdef CONFIG_SMP preempt_disable(); load_LDT(pc); - if (!cpus_equal(current->mm->cpu_vm_mask, - cpumask_of_cpu(smp_processor_id()))) + if (!cpumask_equal(mm_cpumask(current->mm), + cpumask_of(smp_processor_id()))) smp_call_function(flush_ldt, current->mm, 1); preempt_enable(); #else diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 847ab416031..5284cd2b577 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -555,10 +555,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) void __init init_c1e_mask(void) { /* If we're using c1e_idle, we need to allocate c1e_mask. */ - if (pm_idle == c1e_idle) { - alloc_cpumask_var(&c1e_mask, GFP_KERNEL); - cpumask_clear(c1e_mask); - } + if (pm_idle == c1e_idle) + zalloc_cpumask_var(&c1e_mask, GFP_KERNEL); } static int __init idle_setup(char *str) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 09c5e077dff..565ebc65920 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1059,12 +1059,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) #endif current_thread_info()->cpu = 0; /* needed? */ for_each_possible_cpu(i) { - alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); - alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); - alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); - cpumask_clear(per_cpu(cpu_core_map, i)); - cpumask_clear(per_cpu(cpu_sibling_map, i)); - cpumask_clear(cpu_data(i).llc_shared_map); + zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); + zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); } set_cpu_sibling_map(0); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index e293ac56c72..dcb00d27851 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -93,7 +93,6 @@ static struct irqaction irq0 = { void __init setup_default_timer_irq(void) { - irq0.mask = cpumask_of_cpu(0); setup_irq(0, &irq0); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9346e102338..a665c71352b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -14,7 +14,6 @@ #include <linux/spinlock.h> #include <linux/kprobes.h> #include <linux/uaccess.h> -#include <linux/utsname.h> #include <linux/kdebug.h> #include <linux/kernel.h> #include <linux/module.h> diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index cf53a78e2dc..8cb4974ff59 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void) } #ifdef CONFIG_SYSCTL - -static int -vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return proc_dointvec(ctl, write, filp, buffer, lenp, ppos); -} - static ctl_table kernel_table2[] = { { .procname = "vsyscall64", .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = vsyscall_sysctl_change }, + .proc_handler = proc_dointvec }, {} }; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 82728f2c6d5..f4cee9028cf 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, info.si_errno = 0; info.si_code = si_code; info.si_addr = (void __user *)address; + info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0; force_sig_info(si_signo, &info, tsk); } @@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code, } static void -do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) +do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, + unsigned int fault) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; + int code = BUS_ADRERR; up_read(&mm->mmap_sem); @@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; - force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); +#ifdef CONFIG_MEMORY_FAILURE + if (fault & VM_FAULT_HWPOISON) { + printk(KERN_ERR + "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", + tsk->comm, tsk->pid, address); + code = BUS_MCEERR_AR; + } +#endif + force_sig_info_fault(SIGBUS, code, address, tsk); } static noinline void @@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, if (fault & VM_FAULT_OOM) { out_of_memory(regs, error_code, address); } else { - if (fault & VM_FAULT_SIGBUS) - do_sigbus(regs, error_code, address); + if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON)) + do_sigbus(regs, error_code, address, fault); else BUG(); } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 24952fdc7e4..dd38bfbefd1 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -144,6 +144,7 @@ void clflush_cache_range(void *vaddr, unsigned int size) mb(); } +EXPORT_SYMBOL_GPL(clflush_cache_range); static void __cpa_flush_all(void *arg) { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index c814e144a3f..36fe08eeb5c 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -59,7 +59,8 @@ void leave_mm(int cpu) { if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) BUG(); - cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); + cpumask_clear_cpu(cpu, + mm_cpumask(percpu_read(cpu_tlbstate.active_mm))); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -234,8 +235,8 @@ void flush_tlb_current_task(void) preempt_disable(); local_flush_tlb(); - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); preempt_enable(); } @@ -249,8 +250,8 @@ void flush_tlb_mm(struct mm_struct *mm) else leave_mm(smp_processor_id()); } - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL); preempt_enable(); } @@ -268,8 +269,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) leave_mm(smp_processor_id()); } - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, va); + if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(mm_cpumask(mm), mm, va); preempt_enable(); } diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 5db96d4304d..1331fcf2614 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -646,7 +646,7 @@ int get_mp_bus_to_node(int busnum) #else /* CONFIG_X86_32 */ -static unsigned char mp_bus_to_node[BUS_NR] = { +static int mp_bus_to_node[BUS_NR] = { [0 ... BUS_NR - 1] = -1 }; diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 88112b49f02..6b4ffedb93c 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -122,7 +122,7 @@ quiet_cmd_vdso = VDSO $@ $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) -VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) GCOV_PROFILE := n # diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 093dd59b538..3bf7b1d250c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1165,14 +1165,14 @@ static void xen_drop_mm_ref(struct mm_struct *mm) /* Get the "official" set of cpus referring to our pagetable. */ if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask) + if (!cpumask_test_cpu(cpu, mm_cpumask(mm)) && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) continue; smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); } return; } - cpumask_copy(mask, &mm->cpu_vm_mask); + cpumask_copy(mask, mm_cpumask(mm)); /* It's possible that a vcpu may have a stale reference to our cr3, because its in lazy mode, and it hasn't yet flushed diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile index fe3186de6a3..6f56d95f2c1 100644 --- a/arch/xtensa/kernel/Makefile +++ b/arch/xtensa/kernel/Makefile @@ -27,7 +27,8 @@ sed-y = -e 's/(\(\.[a-z]*it\|\.ref\|\)\.text)/(\1.literal \1.text)/g' \ -e 's/(\(\.text\.[a-z]*\))/(\1.literal \1)/g' quiet_cmd__cpp_lds_S = LDS $@ - cmd__cpp_lds_S = $(CPP) $(cpp_flags) -D__ASSEMBLY__ $< | sed $(sed-y) >$@ + cmd__cpp_lds_S = $(CPP) $(cpp_flags) -P -C -Uxtensa -D__ASSEMBLY__ $< \ + | sed $(sed-y) >$@ $(obj)/vmlinux.lds: $(src)/vmlinux.lds.S FORCE $(call if_changed_dep,_cpp_lds_S) diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index d9ddc1ba761..d215adcfd4e 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -235,7 +235,7 @@ should_never_return: * BSS section */ -.section ".bss.page_aligned", "w" +__PAGE_ALIGNED_BSS #ifdef CONFIG_MMU ENTRY(swapper_pg_dir) .fill PAGE_SIZE, 1, 0 diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c index c4302f0e4ba..cd122fb7e48 100644 --- a/arch/xtensa/kernel/init_task.c +++ b/arch/xtensa/kernel/init_task.c @@ -23,9 +23,8 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = -{ INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data = + { INIT_THREAD_INFO(init_task) }; struct task_struct init_task = INIT_TASK(init_task); diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index d8fb3914598..e5aeb2b79e6 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig @@ -14,3 +14,12 @@ config ASYNC_MEMSET tristate select ASYNC_CORE +config ASYNC_PQ + tristate + select ASYNC_CORE + +config ASYNC_RAID6_RECOV + tristate + select ASYNC_CORE + select ASYNC_PQ + diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 27baa7d52fb..d1e0e6f72bc 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile @@ -2,3 +2,6 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o +obj-$(CONFIG_ASYNC_PQ) += async_pq.o +obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o +obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index ddccfb01c41..0ec1fb69d4e 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -33,28 +33,31 @@ * async_memcpy - attempt to copy memory with a dma engine. * @dest: destination page * @src: src page - * @offset: offset in pages to start transaction + * @dest_offset: offset into 'dest' to start transaction + * @src_offset: offset into 'src' to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, - * @depend_tx: memcpy depends on the result of this transaction - * @cb_fn: function to call when the memcpy completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK */ struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, - unsigned int src_offset, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + unsigned int src_offset, size_t len, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY, &dest, 1, &src, 1, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; - if (device) { + if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) { dma_addr_t dma_dest, dma_src; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + unsigned long dma_prep_flags = 0; + if (submit->cb_fn) + dma_prep_flags |= DMA_PREP_INTERRUPT; + if (submit->flags & ASYNC_TX_FENCE) + dma_prep_flags |= DMA_PREP_FENCE; dma_dest = dma_map_page(device->dev, dest, dest_offset, len, DMA_FROM_DEVICE); @@ -67,13 +70,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { void *dest_buf, *src_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; src_buf = kmap_atomic(src, KM_USER1) + src_offset; @@ -83,26 +86,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, kunmap_atomic(dest_buf, KM_USER0); kunmap_atomic(src_buf, KM_USER1); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; } EXPORT_SYMBOL_GPL(async_memcpy); -static int __init async_memcpy_init(void) -{ - return 0; -} - -static void __exit async_memcpy_exit(void) -{ - do { } while (0); -} - -module_init(async_memcpy_init); -module_exit(async_memcpy_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("asynchronous memcpy api"); MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index 5b5eb99bb24..58e4a8752ae 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -35,26 +35,26 @@ * @val: fill value * @offset: offset in pages to start transaction * @len: length in bytes - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK - * @depend_tx: memset depends on the result of this transaction - * @cb_fn: function to call when the memcpy completes - * @cb_param: parameter to pass to the callback routine + * + * honored flags: ASYNC_TX_ACK */ struct dma_async_tx_descriptor * -async_memset(struct page *dest, int val, unsigned int offset, - size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_memset(struct page *dest, int val, unsigned int offset, size_t len, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET, &dest, 1, NULL, 0, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; - if (device) { + if (device && is_dma_fill_aligned(device, offset, 0, len)) { dma_addr_t dma_dest; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + unsigned long dma_prep_flags = 0; + if (submit->cb_fn) + dma_prep_flags |= DMA_PREP_INTERRUPT; + if (submit->flags & ASYNC_TX_FENCE) + dma_prep_flags |= DMA_PREP_FENCE; dma_dest = dma_map_page(device->dev, dest, offset, len, DMA_FROM_DEVICE); @@ -64,38 +64,25 @@ async_memset(struct page *dest, int val, unsigned int offset, if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { /* run the memset synchronously */ void *dest_buf; pr_debug("%s: (sync) len: %zu\n", __func__, len); - dest_buf = (void *) (((char *) page_address(dest)) + offset); + dest_buf = page_address(dest) + offset; /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); memset(dest_buf, val, len); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; } EXPORT_SYMBOL_GPL(async_memset); -static int __init async_memset_init(void) -{ - return 0; -} - -static void __exit async_memset_exit(void) -{ - do { } while (0); -} - -module_init(async_memset_init); -module_exit(async_memset_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("asynchronous memset api"); MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c new file mode 100644 index 00000000000..b88db6d1dc6 --- /dev/null +++ b/crypto/async_tx/async_pq.c @@ -0,0 +1,395 @@ +/* + * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com> + * Copyright(c) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/dma-mapping.h> +#include <linux/raid/pq.h> +#include <linux/async_tx.h> + +/** + * scribble - space to hold throwaway P buffer for synchronous gen_syndrome + */ +static struct page *scribble; + +static bool is_raid6_zero_block(struct page *p) +{ + return p == (void *) raid6_empty_zero_page; +} + +/* the struct page *blocks[] parameter passed to async_gen_syndrome() + * and async_syndrome_val() contains the 'P' destination address at + * blocks[disks-2] and the 'Q' destination address at blocks[disks-1] + * + * note: these are macros as they are used as lvalues + */ +#define P(b, d) (b[d-2]) +#define Q(b, d) (b[d-1]) + +/** + * do_async_gen_syndrome - asynchronously calculate P and/or Q + */ +static __async_inline struct dma_async_tx_descriptor * +do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, + const unsigned char *scfs, unsigned int offset, int disks, + size_t len, dma_addr_t *dma_src, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct dma_device *dma = chan->device; + enum dma_ctrl_flags dma_flags = 0; + enum async_tx_flags flags_orig = submit->flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + dma_async_tx_callback cb_param_orig = submit->cb_param; + int src_cnt = disks - 2; + unsigned char coefs[src_cnt]; + unsigned short pq_src_cnt; + dma_addr_t dma_dest[2]; + int src_off = 0; + int idx; + int i; + + /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */ + if (P(blocks, disks)) + dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset, + len, DMA_BIDIRECTIONAL); + else + dma_flags |= DMA_PREP_PQ_DISABLE_P; + if (Q(blocks, disks)) + dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset, + len, DMA_BIDIRECTIONAL); + else + dma_flags |= DMA_PREP_PQ_DISABLE_Q; + + /* convert source addresses being careful to collapse 'empty' + * sources and update the coefficients accordingly + */ + for (i = 0, idx = 0; i < src_cnt; i++) { + if (is_raid6_zero_block(blocks[i])) + continue; + dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len, + DMA_TO_DEVICE); + coefs[idx] = scfs[i]; + idx++; + } + src_cnt = idx; + + while (src_cnt > 0) { + submit->flags = flags_orig; + pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags)); + /* if we are submitting additional pqs, leave the chain open, + * clear the callback parameters, and leave the destination + * buffers mapped + */ + if (src_cnt > pq_src_cnt) { + submit->flags &= ~ASYNC_TX_ACK; + submit->flags |= ASYNC_TX_FENCE; + dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; + submit->cb_fn = NULL; + submit->cb_param = NULL; + } else { + dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP; + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; + if (cb_fn_orig) + dma_flags |= DMA_PREP_INTERRUPT; + } + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; + + /* Since we have clobbered the src_list we are committed + * to doing this asynchronously. Drivers force forward + * progress in case they can not provide a descriptor + */ + for (;;) { + tx = dma->device_prep_dma_pq(chan, dma_dest, + &dma_src[src_off], + pq_src_cnt, + &coefs[src_off], len, + dma_flags); + if (likely(tx)) + break; + async_tx_quiesce(&submit->depend_tx); + dma_async_issue_pending(chan); + } + + async_tx_submit(chan, tx, submit); + submit->depend_tx = tx; + + /* drop completed sources */ + src_cnt -= pq_src_cnt; + src_off += pq_src_cnt; + + dma_flags |= DMA_PREP_CONTINUE; + } + + return tx; +} + +/** + * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome + */ +static void +do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, + size_t len, struct async_submit_ctl *submit) +{ + void **srcs; + int i; + + if (submit->scribble) + srcs = submit->scribble; + else + srcs = (void **) blocks; + + for (i = 0; i < disks; i++) { + if (is_raid6_zero_block(blocks[i])) { + BUG_ON(i > disks - 3); /* P or Q can't be zero */ + srcs[i] = blocks[i]; + } else + srcs[i] = page_address(blocks[i]) + offset; + } + raid6_call.gen_syndrome(disks, len, srcs); + async_tx_sync_epilog(submit); +} + +/** + * async_gen_syndrome - asynchronously calculate a raid6 syndrome + * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 + * @offset: common offset into each block (src and dest) to start transaction + * @disks: number of blocks (including missing P or Q, see below) + * @len: length of operation in bytes + * @submit: submission/completion modifiers + * + * General note: This routine assumes a field of GF(2^8) with a + * primitive polynomial of 0x11d and a generator of {02}. + * + * 'disks' note: callers can optionally omit either P or Q (but not + * both) from the calculation by setting blocks[disks-2] or + * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <= + * PAGE_SIZE as a temporary buffer of this size is used in the + * synchronous path. 'disks' always accounts for both destination + * buffers. + * + * 'blocks' note: if submit->scribble is NULL then the contents of + * 'blocks' may be overridden + */ +struct dma_async_tx_descriptor * +async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, + size_t len, struct async_submit_ctl *submit) +{ + int src_cnt = disks - 2; + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &P(blocks, disks), 2, + blocks, src_cnt, len); + struct dma_device *device = chan ? chan->device : NULL; + dma_addr_t *dma_src = NULL; + + BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks))); + + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) blocks; + + if (dma_src && device && + (src_cnt <= dma_maxpq(device, 0) || + dma_maxpq(device, DMA_PREP_CONTINUE) > 0) && + is_dma_pq_aligned(device, offset, 0, len)) { + /* run the p+q asynchronously */ + pr_debug("%s: (async) disks: %d len: %zu\n", + __func__, disks, len); + return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset, + disks, len, dma_src, submit); + } + + /* run the pq synchronously */ + pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); + + /* wait for any prerequisite operations */ + async_tx_quiesce(&submit->depend_tx); + + if (!P(blocks, disks)) { + P(blocks, disks) = scribble; + BUG_ON(len + offset > PAGE_SIZE); + } + if (!Q(blocks, disks)) { + Q(blocks, disks) = scribble; + BUG_ON(len + offset > PAGE_SIZE); + } + do_sync_gen_syndrome(blocks, offset, disks, len, submit); + + return NULL; +} +EXPORT_SYMBOL_GPL(async_gen_syndrome); + +/** + * async_syndrome_val - asynchronously validate a raid6 syndrome + * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 + * @offset: common offset into each block (src and dest) to start transaction + * @disks: number of blocks (including missing P or Q, see below) + * @len: length of operation in bytes + * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set + * @spare: temporary result buffer for the synchronous case + * @submit: submission / completion modifiers + * + * The same notes from async_gen_syndrome apply to the 'blocks', + * and 'disks' parameters of this routine. The synchronous path + * requires a temporary result buffer and submit->scribble to be + * specified. + */ +struct dma_async_tx_descriptor * +async_syndrome_val(struct page **blocks, unsigned int offset, int disks, + size_t len, enum sum_check_flags *pqres, struct page *spare, + struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL, + NULL, 0, blocks, disks, + len); + struct dma_device *device = chan ? chan->device : NULL; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; + dma_addr_t *dma_src = NULL; + + BUG_ON(disks < 4); + + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) blocks; + + if (dma_src && device && disks <= dma_maxpq(device, 0) && + is_dma_pq_aligned(device, offset, 0, len)) { + struct device *dev = device->dev; + dma_addr_t *pq = &dma_src[disks-2]; + int i; + + pr_debug("%s: (async) disks: %d len: %zu\n", + __func__, disks, len); + if (!P(blocks, disks)) + dma_flags |= DMA_PREP_PQ_DISABLE_P; + if (!Q(blocks, disks)) + dma_flags |= DMA_PREP_PQ_DISABLE_Q; + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; + for (i = 0; i < disks; i++) + if (likely(blocks[i])) { + BUG_ON(is_raid6_zero_block(blocks[i])); + dma_src[i] = dma_map_page(dev, blocks[i], + offset, len, + DMA_TO_DEVICE); + } + + for (;;) { + tx = device->device_prep_dma_pq_val(chan, pq, dma_src, + disks - 2, + raid6_gfexp, + len, pqres, + dma_flags); + if (likely(tx)) + break; + async_tx_quiesce(&submit->depend_tx); + dma_async_issue_pending(chan); + } + async_tx_submit(chan, tx, submit); + + return tx; + } else { + struct page *p_src = P(blocks, disks); + struct page *q_src = Q(blocks, disks); + enum async_tx_flags flags_orig = submit->flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + void *scribble = submit->scribble; + void *cb_param_orig = submit->cb_param; + void *p, *q, *s; + + pr_debug("%s: (sync) disks: %d len: %zu\n", + __func__, disks, len); + + /* caller must provide a temporary result buffer and + * allow the input parameters to be preserved + */ + BUG_ON(!spare || !scribble); + + /* wait for any prerequisite operations */ + async_tx_quiesce(&submit->depend_tx); + + /* recompute p and/or q into the temporary buffer and then + * check to see the result matches the current value + */ + tx = NULL; + *pqres = 0; + if (p_src) { + init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL, + NULL, NULL, scribble); + tx = async_xor(spare, blocks, offset, disks-2, len, submit); + async_tx_quiesce(&tx); + p = page_address(p_src) + offset; + s = page_address(spare) + offset; + *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P; + } + + if (q_src) { + P(blocks, disks) = NULL; + Q(blocks, disks) = spare; + init_async_submit(submit, 0, NULL, NULL, NULL, scribble); + tx = async_gen_syndrome(blocks, offset, disks, len, submit); + async_tx_quiesce(&tx); + q = page_address(q_src) + offset; + s = page_address(spare) + offset; + *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q; + } + + /* restore P, Q and submit */ + P(blocks, disks) = p_src; + Q(blocks, disks) = q_src; + + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; + submit->flags = flags_orig; + async_tx_sync_epilog(submit); + + return NULL; + } +} +EXPORT_SYMBOL_GPL(async_syndrome_val); + +static int __init async_pq_init(void) +{ + scribble = alloc_page(GFP_KERNEL); + + if (scribble) + return 0; + + pr_err("%s: failed to allocate required spare page\n", __func__); + + return -ENOMEM; +} + +static void __exit async_pq_exit(void) +{ + put_page(scribble); +} + +module_init(async_pq_init); +module_exit(async_pq_exit); + +MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation"); +MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c new file mode 100644 index 00000000000..6d73dde4786 --- /dev/null +++ b/crypto/async_tx/async_raid6_recov.c @@ -0,0 +1,468 @@ +/* + * Asynchronous RAID-6 recovery calculations ASYNC_TX API. + * Copyright(c) 2009 Intel Corporation + * + * based on raid6recov.c: + * Copyright 2002 H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/dma-mapping.h> +#include <linux/raid/pq.h> +#include <linux/async_tx.h> + +static struct dma_async_tx_descriptor * +async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, + size_t len, struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &dest, 1, srcs, 2, len); + struct dma_device *dma = chan ? chan->device : NULL; + const u8 *amul, *bmul; + u8 ax, bx; + u8 *a, *b, *c; + + if (dma) { + dma_addr_t dma_dest[2]; + dma_addr_t dma_src[2]; + struct device *dev = dma->dev; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; + dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); + dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); + tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, + len, dma_flags); + if (tx) { + async_tx_submit(chan, tx, submit); + return tx; + } + + /* could not get a descriptor, unmap and fall through to + * the synchronous path + */ + dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); + dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE); + } + + /* run the operation synchronously */ + async_tx_quiesce(&submit->depend_tx); + amul = raid6_gfmul[coef[0]]; + bmul = raid6_gfmul[coef[1]]; + a = page_address(srcs[0]); + b = page_address(srcs[1]); + c = page_address(dest); + + while (len--) { + ax = amul[*a++]; + bx = bmul[*b++]; + *c++ = ax ^ bx; + } + + return NULL; +} + +static struct dma_async_tx_descriptor * +async_mult(struct page *dest, struct page *src, u8 coef, size_t len, + struct async_submit_ctl *submit) +{ + struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, + &dest, 1, &src, 1, len); + struct dma_device *dma = chan ? chan->device : NULL; + const u8 *qmul; /* Q multiplier table */ + u8 *d, *s; + + if (dma) { + dma_addr_t dma_dest[2]; + dma_addr_t dma_src[1]; + struct device *dev = dma->dev; + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; + + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; + dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); + tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, + len, dma_flags); + if (tx) { + async_tx_submit(chan, tx, submit); + return tx; + } + + /* could not get a descriptor, unmap and fall through to + * the synchronous path + */ + dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); + } + + /* no channel available, or failed to allocate a descriptor, so + * perform the operation synchronously + */ + async_tx_quiesce(&submit->depend_tx); + qmul = raid6_gfmul[coef]; + d = page_address(dest); + s = page_address(src); + + while (len--) + *d++ = qmul[*s++]; + + return NULL; +} + +static struct dma_async_tx_descriptor * +__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *a, *b; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + + p = blocks[4-2]; + q = blocks[4-1]; + + a = blocks[faila]; + b = blocks[failb]; + + /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */ + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = p; + srcs[1] = q; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_sum_product(b, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = p; + srcs[1] = b; + init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(a, srcs, 0, 2, bytes, submit); + + return tx; + +} + +static struct dma_async_tx_descriptor * +__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, + struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *g, *dp, *dq; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + int uninitialized_var(good); + int i; + + for (i = 0; i < 3; i++) { + if (i == faila || i == failb) + continue; + else { + good = i; + break; + } + } + BUG_ON(i >= 3); + + p = blocks[5-2]; + q = blocks[5-1]; + g = blocks[good]; + + /* Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for delta p and + * delta q + */ + dp = blocks[faila]; + dq = blocks[failb]; + + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_memcpy(dp, g, 0, 0, bytes, submit); + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); + + /* compute P + Pxy */ + srcs[0] = dp; + srcs[1] = p; + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + /* compute Q + Qxy */ + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = dp; + srcs[1] = dq; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_sum_product(dq, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = dp; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + return tx; +} + +static struct dma_async_tx_descriptor * +__2data_recov_n(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *dp, *dq; + struct page *srcs[2]; + unsigned char coef[2]; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + + p = blocks[disks-2]; + q = blocks[disks-1]; + + /* Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = blocks[faila]; + blocks[faila] = (void *)raid6_empty_zero_page; + blocks[disks-2] = dp; + dq = blocks[failb]; + blocks[failb] = (void *)raid6_empty_zero_page; + blocks[disks-1] = dq; + + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); + + /* Restore pointer table */ + blocks[faila] = dp; + blocks[failb] = dq; + blocks[disks-2] = p; + blocks[disks-1] = q; + + /* compute P + Pxy */ + srcs[0] = dp; + srcs[1] = p; + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + /* compute Q + Qxy */ + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ + srcs[0] = dp; + srcs[1] = dq; + coef[0] = raid6_gfexi[failb-faila]; + coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_sum_product(dq, srcs, coef, bytes, submit); + + /* Dy = P+Pxy+Dx */ + srcs[0] = dp; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(dp, srcs, 0, 2, bytes, submit); + + return tx; +} + +/** + * async_raid6_2data_recov - asynchronously calculate two missing data blocks + * @disks: number of disks in the RAID-6 array + * @bytes: block size + * @faila: first failed drive index + * @failb: second failed drive index + * @blocks: array of source pointers where the last two entries are p and q + * @submit: submission/completion modifiers + */ +struct dma_async_tx_descriptor * +async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, + struct page **blocks, struct async_submit_ctl *submit) +{ + BUG_ON(faila == failb); + if (failb < faila) + swap(faila, failb); + + pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); + + /* we need to preserve the contents of 'blocks' for the async + * case, so punt to synchronous if a scribble buffer is not available + */ + if (!submit->scribble) { + void **ptrs = (void **) blocks; + int i; + + async_tx_quiesce(&submit->depend_tx); + for (i = 0; i < disks; i++) + ptrs[i] = page_address(blocks[i]); + + raid6_2data_recov(disks, bytes, faila, failb, ptrs); + + async_tx_sync_epilog(submit); + + return NULL; + } + + switch (disks) { + case 4: + /* dma devices do not uniformly understand a zero source pq + * operation (in contrast to the synchronous case), so + * explicitly handle the 4 disk special case + */ + return __2data_recov_4(bytes, faila, failb, blocks, submit); + case 5: + /* dma devices do not uniformly understand a single + * source pq operation (in contrast to the synchronous + * case), so explicitly handle the 5 disk special case + */ + return __2data_recov_5(bytes, faila, failb, blocks, submit); + default: + return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); + } +} +EXPORT_SYMBOL_GPL(async_raid6_2data_recov); + +/** + * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block + * @disks: number of disks in the RAID-6 array + * @bytes: block size + * @faila: failed drive index + * @blocks: array of source pointers where the last two entries are p and q + * @submit: submission/completion modifiers + */ +struct dma_async_tx_descriptor * +async_raid6_datap_recov(int disks, size_t bytes, int faila, + struct page **blocks, struct async_submit_ctl *submit) +{ + struct dma_async_tx_descriptor *tx = NULL; + struct page *p, *q, *dq; + u8 coef; + enum async_tx_flags flags = submit->flags; + dma_async_tx_callback cb_fn = submit->cb_fn; + void *cb_param = submit->cb_param; + void *scribble = submit->scribble; + struct page *srcs[2]; + + pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); + + /* we need to preserve the contents of 'blocks' for the async + * case, so punt to synchronous if a scribble buffer is not available + */ + if (!scribble) { + void **ptrs = (void **) blocks; + int i; + + async_tx_quiesce(&submit->depend_tx); + for (i = 0; i < disks; i++) + ptrs[i] = page_address(blocks[i]); + + raid6_datap_recov(disks, bytes, faila, ptrs); + + async_tx_sync_epilog(submit); + + return NULL; + } + + p = blocks[disks-2]; + q = blocks[disks-1]; + + /* Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = blocks[faila]; + blocks[faila] = (void *)raid6_empty_zero_page; + blocks[disks-1] = dq; + + /* in the 4 disk case we only need to perform a single source + * multiplication + */ + if (disks == 4) { + int good = faila == 0 ? 1 : 0; + struct page *g = blocks[good]; + + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, + scribble); + tx = async_memcpy(p, g, 0, 0, bytes, submit); + + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, + scribble); + tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); + } else { + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, + scribble); + tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); + } + + /* Restore pointer table */ + blocks[faila] = dq; + blocks[disks-1] = q; + + /* calculate g^{-faila} */ + coef = raid6_gfinv[raid6_gfexp[faila]]; + + srcs[0] = dq; + srcs[1] = q; + init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + NULL, NULL, scribble); + tx = async_xor(dq, srcs, 0, 2, bytes, submit); + + init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); + tx = async_mult(dq, dq, coef, bytes, submit); + + srcs[0] = p; + srcs[1] = dq; + init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, + cb_param, scribble); + tx = async_xor(p, srcs, 0, 2, bytes, submit); + + return tx; +} +EXPORT_SYMBOL_GPL(async_raid6_datap_recov); + +MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>"); +MODULE_DESCRIPTION("asynchronous RAID-6 recovery api"); +MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 06eb6cc09fe..f9cdf04fe7c 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -42,16 +42,21 @@ static void __exit async_tx_exit(void) async_dmaengine_put(); } +module_init(async_tx_init); +module_exit(async_tx_exit); + /** * __async_tx_find_channel - find a channel to carry out the operation or let * the transaction execute synchronously - * @depend_tx: transaction dependency + * @submit: transaction dependency and submission modifiers * @tx_type: transaction type */ struct dma_chan * -__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type) +__async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type) { + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; + /* see if we can keep the chain on one channel */ if (depend_tx && dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) @@ -59,17 +64,6 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, return async_dma_find_channel(tx_type); } EXPORT_SYMBOL_GPL(__async_tx_find_channel); -#else -static int __init async_tx_init(void) -{ - printk(KERN_INFO "async_tx: api initialized (sync-only)\n"); - return 0; -} - -static void __exit async_tx_exit(void) -{ - do { } while (0); -} #endif @@ -83,10 +77,14 @@ static void async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, struct dma_async_tx_descriptor *tx) { - struct dma_chan *chan; - struct dma_device *device; + struct dma_chan *chan = depend_tx->chan; + struct dma_device *device = chan->device; struct dma_async_tx_descriptor *intr_tx = (void *) ~0; + #ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH + BUG(); + #endif + /* first check to see if we can still append to depend_tx */ spin_lock_bh(&depend_tx->lock); if (depend_tx->parent && depend_tx->chan == tx->chan) { @@ -96,11 +94,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, } spin_unlock_bh(&depend_tx->lock); - if (!intr_tx) + /* attached dependency, flush the parent channel */ + if (!intr_tx) { + device->device_issue_pending(chan); return; - - chan = depend_tx->chan; - device = chan->device; + } /* see if we can schedule an interrupt * otherwise poll for completion @@ -134,6 +132,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, intr_tx->tx_submit(intr_tx); async_tx_ack(intr_tx); } + device->device_issue_pending(chan); } else { if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) panic("%s: DMA_ERROR waiting for depend_tx\n", @@ -144,13 +143,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, /** - * submit_disposition - while holding depend_tx->lock we must avoid submitting - * new operations to prevent a circular locking dependency with - * drivers that already hold a channel lock when calling - * async_tx_run_dependencies. + * submit_disposition - flags for routing an incoming operation * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly + * + * while holding depend_tx->lock we must avoid submitting new operations + * to prevent a circular locking dependency with drivers that already + * hold a channel lock when calling async_tx_run_dependencies. */ enum submit_disposition { ASYNC_TX_SUBMITTED, @@ -160,11 +160,12 @@ enum submit_disposition { void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, - enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + struct async_submit_ctl *submit) { - tx->callback = cb_fn; - tx->callback_param = cb_param; + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; + + tx->callback = submit->cb_fn; + tx->callback_param = submit->cb_param; if (depend_tx) { enum submit_disposition s; @@ -220,30 +221,29 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, tx->tx_submit(tx); } - if (flags & ASYNC_TX_ACK) + if (submit->flags & ASYNC_TX_ACK) async_tx_ack(tx); - if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) + if (depend_tx) async_tx_ack(depend_tx); } EXPORT_SYMBOL_GPL(async_tx_submit); /** - * async_trigger_callback - schedules the callback function to be run after - * any dependent operations have been completed. - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK - * @depend_tx: 'callback' requires the completion of this transaction - * @cb_fn: function to call after depend_tx completes - * @cb_param: parameter to pass to the callback routine + * async_trigger_callback - schedules the callback function to be run + * @submit: submission and completion parameters + * + * honored flags: ASYNC_TX_ACK + * + * The callback is run after any dependent operations have completed. */ struct dma_async_tx_descriptor * -async_trigger_callback(enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_trigger_callback(struct async_submit_ctl *submit) { struct dma_chan *chan; struct dma_device *device; struct dma_async_tx_descriptor *tx; + struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; if (depend_tx) { chan = depend_tx->chan; @@ -262,14 +262,14 @@ async_trigger_callback(enum async_tx_flags flags, if (tx) { pr_debug("%s: (async)\n", __func__); - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { pr_debug("%s: (sync)\n", __func__); /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } return tx; @@ -295,9 +295,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx) } EXPORT_SYMBOL_GPL(async_tx_quiesce); -module_init(async_tx_init); -module_exit(async_tx_exit); - MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); MODULE_LICENSE("GPL"); diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 90dd3f8bd28..b459a9034aa 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -33,19 +33,16 @@ /* do_async_xor - dma map the pages and perform the xor with an engine */ static __async_inline struct dma_async_tx_descriptor * do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src, + struct async_submit_ctl *submit) { struct dma_device *dma = chan->device; - dma_addr_t *dma_src = (dma_addr_t *) src_list; struct dma_async_tx_descriptor *tx = NULL; int src_off = 0; int i; - dma_async_tx_callback _cb_fn; - void *_cb_param; - enum async_tx_flags async_flags; + dma_async_tx_callback cb_fn_orig = submit->cb_fn; + void *cb_param_orig = submit->cb_param; + enum async_tx_flags flags_orig = submit->flags; enum dma_ctrl_flags dma_flags; int xor_src_cnt; dma_addr_t dma_dest; @@ -63,25 +60,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, } while (src_cnt) { - async_flags = flags; + submit->flags = flags_orig; dma_flags = 0; - xor_src_cnt = min(src_cnt, dma->max_xor); + xor_src_cnt = min(src_cnt, (int)dma->max_xor); /* if we are submitting additional xors, leave the chain open, * clear the callback parameters, and leave the destination * buffer mapped */ if (src_cnt > xor_src_cnt) { - async_flags &= ~ASYNC_TX_ACK; + submit->flags &= ~ASYNC_TX_ACK; + submit->flags |= ASYNC_TX_FENCE; dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; - _cb_fn = NULL; - _cb_param = NULL; + submit->cb_fn = NULL; + submit->cb_param = NULL; } else { - _cb_fn = cb_fn; - _cb_param = cb_param; + submit->cb_fn = cb_fn_orig; + submit->cb_param = cb_param_orig; } - if (_cb_fn) + if (submit->cb_fn) dma_flags |= DMA_PREP_INTERRUPT; - + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; /* Since we have clobbered the src_list we are committed * to doing this asynchronously. Drivers force forward progress * in case they can not provide a descriptor @@ -90,7 +89,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, xor_src_cnt, len, dma_flags); if (unlikely(!tx)) - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); /* spin wait for the preceeding transactions to complete */ while (unlikely(!tx)) { @@ -101,11 +100,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, dma_flags); } - async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, - _cb_param); - - depend_tx = tx; - flags |= ASYNC_TX_DEP_ACK; + async_tx_submit(chan, tx, submit); + submit->depend_tx = tx; if (src_cnt > xor_src_cnt) { /* drop completed sources */ @@ -124,23 +120,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, static void do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - dma_async_tx_callback cb_fn, void *cb_param) + int src_cnt, size_t len, struct async_submit_ctl *submit) { int i; int xor_src_cnt; int src_off = 0; void *dest_buf; - void **srcs = (void **) src_list; + void **srcs; + + if (submit->scribble) + srcs = submit->scribble; + else + srcs = (void **) src_list; - /* reuse the 'src_list' array to convert to buffer pointers */ + /* convert to buffer pointers */ for (i = 0; i < src_cnt; i++) srcs[i] = page_address(src_list[i]) + offset; /* set destination address */ dest_buf = page_address(dest) + offset; - if (flags & ASYNC_TX_XOR_ZERO_DST) + if (submit->flags & ASYNC_TX_XOR_ZERO_DST) memset(dest_buf, 0, len); while (src_cnt > 0) { @@ -153,61 +153,70 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, src_off += xor_src_cnt; } - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); } /** * async_xor - attempt to xor a set of blocks with a dma engine. - * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST - * flag must be set to not include dest data in the calculation. The - * assumption with dma eninges is that they only use the destination - * buffer as a source when it is explicity specified in the source list. * @dest: destination page - * @src_list: array of source pages (if the dest is also a source it must be - * at index zero). The contents of this array may be overwritten. - * @offset: offset in pages to start transaction + * @src_list: array of source pages + * @offset: common src/dst offset to start transaction * @src_cnt: number of source pages * @len: length in bytes - * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, - * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK - * @depend_tx: xor depends on the result of this transaction. - * @cb_fn: function to call when the xor completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST + * + * xor_blocks always uses the dest as a source so the + * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in + * the calculation. The assumption with dma eninges is that they only + * use the destination buffer as a source when it is explicity specified + * in the source list. + * + * src_list note: if the dest is also a source it must be at index zero. + * The contents of this array will be overwritten if a scribble region + * is not specified. */ struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + int src_cnt, size_t len, struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, &dest, 1, src_list, src_cnt, len); + dma_addr_t *dma_src = NULL; + BUG_ON(src_cnt <= 1); - if (chan) { + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) src_list; + + if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) { /* run the xor asynchronously */ pr_debug("%s (async): len: %zu\n", __func__, len); return do_async_xor(chan, dest, src_list, offset, src_cnt, len, - flags, depend_tx, cb_fn, cb_param); + dma_src, submit); } else { /* run the xor synchronously */ pr_debug("%s (sync): len: %zu\n", __func__, len); + WARN_ONCE(chan, "%s: no space for dma address conversion\n", + __func__); /* in the sync case the dest is an implied source * (assumes the dest is the first source) */ - if (flags & ASYNC_TX_XOR_DROP_DST) { + if (submit->flags & ASYNC_TX_XOR_DROP_DST) { src_cnt--; src_list++; } /* wait for any prerequisite operations */ - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); - do_sync_xor(dest, src_list, offset, src_cnt, len, - flags, cb_fn, cb_param); + do_sync_xor(dest, src_list, offset, src_cnt, len, submit); return NULL; } @@ -222,104 +231,94 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) } /** - * async_xor_zero_sum - attempt a xor parity check with a dma engine. + * async_xor_val - attempt a xor parity check with a dma engine. * @dest: destination page used if the xor is performed synchronously - * @src_list: array of source pages. The dest page must be listed as a source - * at index zero. The contents of this array may be overwritten. + * @src_list: array of source pages * @offset: offset in pages to start transaction * @src_cnt: number of source pages * @len: length in bytes * @result: 0 if sum == 0 else non-zero - * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK - * @depend_tx: xor depends on the result of this transaction. - * @cb_fn: function to call when the xor completes - * @cb_param: parameter to pass to the callback routine + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK + * + * src_list note: if the dest is also a source it must be at index zero. + * The contents of this array will be overwritten if a scribble region + * is not specified. */ struct dma_async_tx_descriptor * -async_xor_zero_sum(struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - u32 *result, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, enum sum_check_flags *result, + struct async_submit_ctl *submit) { - struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM, + struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL, &dest, 1, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; + dma_addr_t *dma_src = NULL; BUG_ON(src_cnt <= 1); - if (device && src_cnt <= device->max_xor) { - dma_addr_t *dma_src = (dma_addr_t *) src_list; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; + if (submit->scribble) + dma_src = submit->scribble; + else if (sizeof(dma_addr_t) <= sizeof(struct page *)) + dma_src = (dma_addr_t *) src_list; + + if (dma_src && device && src_cnt <= device->max_xor && + is_dma_xor_aligned(device, offset, 0, len)) { + unsigned long dma_prep_flags = 0; int i; pr_debug("%s: (async) len: %zu\n", __func__, len); + if (submit->cb_fn) + dma_prep_flags |= DMA_PREP_INTERRUPT; + if (submit->flags & ASYNC_TX_FENCE) + dma_prep_flags |= DMA_PREP_FENCE; for (i = 0; i < src_cnt; i++) dma_src[i] = dma_map_page(device->dev, src_list[i], offset, len, DMA_TO_DEVICE); - tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, - len, result, - dma_prep_flags); + tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, + len, result, + dma_prep_flags); if (unlikely(!tx)) { - async_tx_quiesce(&depend_tx); + async_tx_quiesce(&submit->depend_tx); while (!tx) { dma_async_issue_pending(chan); - tx = device->device_prep_dma_zero_sum(chan, + tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, len, result, dma_prep_flags); } } - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + async_tx_submit(chan, tx, submit); } else { - unsigned long xor_flags = flags; + enum async_tx_flags flags_orig = submit->flags; pr_debug("%s: (sync) len: %zu\n", __func__, len); + WARN_ONCE(device && src_cnt <= device->max_xor, + "%s: no space for dma address conversion\n", + __func__); - xor_flags |= ASYNC_TX_XOR_DROP_DST; - xor_flags &= ~ASYNC_TX_ACK; + submit->flags |= ASYNC_TX_XOR_DROP_DST; + submit->flags &= ~ASYNC_TX_ACK; - tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, - depend_tx, NULL, NULL); + tx = async_xor(dest, src_list, offset, src_cnt, len, submit); async_tx_quiesce(&tx); - *result = page_is_zero(dest, offset, len) ? 0 : 1; + *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P; - async_tx_sync_epilog(cb_fn, cb_param); + async_tx_sync_epilog(submit); + submit->flags = flags_orig; } return tx; } -EXPORT_SYMBOL_GPL(async_xor_zero_sum); - -static int __init async_xor_init(void) -{ - #ifdef CONFIG_ASYNC_TX_DMA - /* To conserve stack space the input src_list (array of page pointers) - * is reused to hold the array of dma addresses passed to the driver. - * This conversion is only possible when dma_addr_t is less than the - * the size of a pointer. HIGHMEM64G is known to violate this - * assumption. - */ - BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *)); - #endif - - return 0; -} - -static void __exit async_xor_exit(void) -{ - do { } while (0); -} - -module_init(async_xor_init); -module_exit(async_xor_exit); +EXPORT_SYMBOL_GPL(async_xor_val); MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c new file mode 100644 index 00000000000..3ec27c7e62e --- /dev/null +++ b/crypto/async_tx/raid6test.c @@ -0,0 +1,240 @@ +/* + * asynchronous raid6 recovery self test + * Copyright (c) 2009, Intel Corporation. + * + * based on drivers/md/raid6test/test.c: + * Copyright 2002-2007 H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include <linux/async_tx.h> +#include <linux/random.h> + +#undef pr +#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args) + +#define NDISKS 16 /* Including P and Q */ + +static struct page *dataptrs[NDISKS]; +static addr_conv_t addr_conv[NDISKS]; +static struct page *data[NDISKS+3]; +static struct page *spare; +static struct page *recovi; +static struct page *recovj; + +static void callback(void *param) +{ + struct completion *cmp = param; + + complete(cmp); +} + +static void makedata(int disks) +{ + int i, j; + + for (i = 0; i < disks; i++) { + for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) { + u32 *p = page_address(data[i]) + j; + + *p = random32(); + } + + dataptrs[i] = data[i]; + } +} + +static char disk_type(int d, int disks) +{ + if (d == disks - 2) + return 'P'; + else if (d == disks - 1) + return 'Q'; + else + return 'D'; +} + +/* Recover two failed blocks. */ +static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs) +{ + struct async_submit_ctl submit; + struct completion cmp; + struct dma_async_tx_descriptor *tx = NULL; + enum sum_check_flags result = ~0; + + if (faila > failb) + swap(faila, failb); + + if (failb == disks-1) { + if (faila == disks-2) { + /* P+Q failure. Just rebuild the syndrome. */ + init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); + tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); + } else { + struct page *blocks[disks]; + struct page *dest; + int count = 0; + int i; + + /* data+Q failure. Reconstruct data from P, + * then rebuild syndrome + */ + for (i = disks; i-- ; ) { + if (i == faila || i == failb) + continue; + blocks[count++] = ptrs[i]; + } + dest = ptrs[faila]; + init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, + NULL, NULL, addr_conv); + tx = async_xor(dest, blocks, 0, count, bytes, &submit); + + init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv); + tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); + } + } else { + if (failb == disks-2) { + /* data+P failure. */ + init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); + tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit); + } else { + /* data+data failure. */ + init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); + tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit); + } + } + init_completion(&cmp); + init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv); + tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit); + async_tx_issue_pending(tx); + + if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) + pr("%s: timeout! (faila: %d failb: %d disks: %d)\n", + __func__, faila, failb, disks); + + if (result != 0) + pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n", + __func__, faila, failb, result); +} + +static int test_disks(int i, int j, int disks) +{ + int erra, errb; + + memset(page_address(recovi), 0xf0, PAGE_SIZE); + memset(page_address(recovj), 0xba, PAGE_SIZE); + + dataptrs[i] = recovi; + dataptrs[j] = recovj; + + raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs); + + erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE); + errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE); + + pr("%s(%d, %d): faila=%3d(%c) failb=%3d(%c) %s\n", + __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks), + (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB"); + + dataptrs[i] = data[i]; + dataptrs[j] = data[j]; + + return erra || errb; +} + +static int test(int disks, int *tests) +{ + struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; + struct completion cmp; + int err = 0; + int i, j; + + recovi = data[disks]; + recovj = data[disks+1]; + spare = data[disks+2]; + + makedata(disks); + + /* Nuke syndromes */ + memset(page_address(data[disks-2]), 0xee, PAGE_SIZE); + memset(page_address(data[disks-1]), 0xee, PAGE_SIZE); + + /* Generate assumed good syndrome */ + init_completion(&cmp); + init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv); + tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit); + async_tx_issue_pending(tx); + + if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) { + pr("error: initial gen_syndrome(%d) timed out\n", disks); + return 1; + } + + pr("testing the %d-disk case...\n", disks); + for (i = 0; i < disks-1; i++) + for (j = i+1; j < disks; j++) { + (*tests)++; + err += test_disks(i, j, disks); + } + + return err; +} + + +static int raid6_test(void) +{ + int err = 0; + int tests = 0; + int i; + + for (i = 0; i < NDISKS+3; i++) { + data[i] = alloc_page(GFP_KERNEL); + if (!data[i]) { + while (i--) + put_page(data[i]); + return -ENOMEM; + } + } + + /* the 4-disk and 5-disk cases are special for the recovery code */ + if (NDISKS > 4) + err += test(4, &tests); + if (NDISKS > 5) + err += test(5, &tests); + err += test(NDISKS, &tests); + + pr("\n"); + pr("complete (%d tests, %d failure%s)\n", + tests, err, err == 1 ? "" : "s"); + + for (i = 0; i < NDISKS+3; i++) + put_page(data[i]); + + return 0; +} + +static void raid6_test_exit(void) +{ +} + +/* when compiled-in wait for drivers to load first (assumes dma drivers + * are also compliled-in) + */ +late_initcall(raid6_test); +module_exit(raid6_test_exit); +MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>"); +MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests"); +MODULE_LICENSE("GPL"); diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index d295bdccc09..9335b87c517 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -115,6 +115,9 @@ static const struct file_operations acpi_button_state_fops = { .release = single_release, }; +static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier); +static struct acpi_device *lid_device; + /* -------------------------------------------------------------------------- FS Interface (/proc) -------------------------------------------------------------------------- */ @@ -231,11 +234,38 @@ static int acpi_button_remove_fs(struct acpi_device *device) /* -------------------------------------------------------------------------- Driver Interface -------------------------------------------------------------------------- */ +int acpi_lid_notifier_register(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&acpi_lid_notifier, nb); +} +EXPORT_SYMBOL(acpi_lid_notifier_register); + +int acpi_lid_notifier_unregister(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&acpi_lid_notifier, nb); +} +EXPORT_SYMBOL(acpi_lid_notifier_unregister); + +int acpi_lid_open(void) +{ + acpi_status status; + unsigned long long state; + + status = acpi_evaluate_integer(lid_device->handle, "_LID", NULL, + &state); + if (ACPI_FAILURE(status)) + return -ENODEV; + + return !!state; +} +EXPORT_SYMBOL(acpi_lid_open); + static int acpi_lid_send_state(struct acpi_device *device) { struct acpi_button *button = acpi_driver_data(device); unsigned long long state; acpi_status status; + int ret; status = acpi_evaluate_integer(device->handle, "_LID", NULL, &state); if (ACPI_FAILURE(status)) @@ -244,7 +274,12 @@ static int acpi_lid_send_state(struct acpi_device *device) /* input layer checks if event is redundant */ input_report_switch(button->input, SW_LID, !state); input_sync(button->input); - return 0; + + ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device); + if (ret == NOTIFY_DONE) + ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, + device); + return ret; } static void acpi_button_notify(struct acpi_device *device, u32 event) @@ -366,8 +401,14 @@ static int acpi_button_add(struct acpi_device *device) error = input_register_device(input); if (error) goto err_remove_fs; - if (button->type == ACPI_BUTTON_TYPE_LID) + if (button->type == ACPI_BUTTON_TYPE_LID) { acpi_lid_send_state(device); + /* + * This assumes there's only one lid device, or if there are + * more we only care about the last one... + */ + lid_device = device; + } if (device->wakeup.flags.valid) { /* Button's GPE is run-wake GPE */ diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 56071b67bed..5633b86e3ed 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -193,7 +193,7 @@ acpi_status __init acpi_os_initialize(void) static void bind_to_cpu0(struct work_struct *work) { - set_cpus_allowed(current, cpumask_of_cpu(0)); + set_cpus_allowed_ptr(current, cpumask_of(0)); kfree(work); } diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 11088cf1031..8ba0ed0b9dd 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -511,7 +511,7 @@ int acpi_processor_preregister_performance( struct acpi_processor *match_pr; struct acpi_psd_package *match_pdomain; - if (!alloc_cpumask_var(&covered_cpus, GFP_KERNEL)) + if (!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL)) return -ENOMEM; mutex_lock(&performance_mutex); @@ -558,7 +558,6 @@ int acpi_processor_preregister_performance( * Now that we have _PSD data from all CPUs, lets setup P-state * domain info. */ - cpumask_clear(covered_cpus); for_each_possible_cpu(i) { pr = per_cpu(processors, i); if (!pr) diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index ce7cf3bc510..4c6c14c1e30 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -77,7 +77,7 @@ static int acpi_processor_update_tsd_coord(void) struct acpi_tsd_package *pdomain, *match_pdomain; struct acpi_processor_throttling *pthrottling, *match_pthrottling; - if (!alloc_cpumask_var(&covered_cpus, GFP_KERNEL)) + if (!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL)) return -ENOMEM; /* @@ -105,7 +105,6 @@ static int acpi_processor_update_tsd_coord(void) if (retval) goto err_ret; - cpumask_clear(covered_cpus); for_each_possible_cpu(i) { pr = per_cpu(processors, i); if (!pr) diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 2de64065aa1..29e66d603d3 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -790,11 +790,15 @@ he_init_group(struct he_dev *he_dev, int group) he_dev->rbps_base = pci_alloc_consistent(he_dev->pci_dev, CONFIG_RBPS_SIZE * sizeof(struct he_rbp), &he_dev->rbps_phys); if (he_dev->rbps_base == NULL) { - hprintk("failed to alloc rbps\n"); - return -ENOMEM; + hprintk("failed to alloc rbps_base\n"); + goto out_destroy_rbps_pool; } memset(he_dev->rbps_base, 0, CONFIG_RBPS_SIZE * sizeof(struct he_rbp)); he_dev->rbps_virt = kmalloc(CONFIG_RBPS_SIZE * sizeof(struct he_virt), GFP_KERNEL); + if (he_dev->rbps_virt == NULL) { + hprintk("failed to alloc rbps_virt\n"); + goto out_free_rbps_base; + } for (i = 0; i < CONFIG_RBPS_SIZE; ++i) { dma_addr_t dma_handle; @@ -802,7 +806,7 @@ he_init_group(struct he_dev *he_dev, int group) cpuaddr = pci_pool_alloc(he_dev->rbps_pool, GFP_KERNEL|GFP_DMA, &dma_handle); if (cpuaddr == NULL) - return -ENOMEM; + goto out_free_rbps_virt; he_dev->rbps_virt[i].virt = cpuaddr; he_dev->rbps_base[i].status = RBP_LOANED | RBP_SMALLBUF | (i << RBP_INDEX_OFF); @@ -827,17 +831,21 @@ he_init_group(struct he_dev *he_dev, int group) CONFIG_RBPL_BUFSIZE, 8, 0); if (he_dev->rbpl_pool == NULL) { hprintk("unable to create rbpl pool\n"); - return -ENOMEM; + goto out_free_rbps_virt; } he_dev->rbpl_base = pci_alloc_consistent(he_dev->pci_dev, CONFIG_RBPL_SIZE * sizeof(struct he_rbp), &he_dev->rbpl_phys); if (he_dev->rbpl_base == NULL) { - hprintk("failed to alloc rbpl\n"); - return -ENOMEM; + hprintk("failed to alloc rbpl_base\n"); + goto out_destroy_rbpl_pool; } memset(he_dev->rbpl_base, 0, CONFIG_RBPL_SIZE * sizeof(struct he_rbp)); he_dev->rbpl_virt = kmalloc(CONFIG_RBPL_SIZE * sizeof(struct he_virt), GFP_KERNEL); + if (he_dev->rbpl_virt == NULL) { + hprintk("failed to alloc rbpl_virt\n"); + goto out_free_rbpl_base; + } for (i = 0; i < CONFIG_RBPL_SIZE; ++i) { dma_addr_t dma_handle; @@ -845,7 +853,7 @@ he_init_group(struct he_dev *he_dev, int group) cpuaddr = pci_pool_alloc(he_dev->rbpl_pool, GFP_KERNEL|GFP_DMA, &dma_handle); if (cpuaddr == NULL) - return -ENOMEM; + goto out_free_rbpl_virt; he_dev->rbpl_virt[i].virt = cpuaddr; he_dev->rbpl_base[i].status = RBP_LOANED | (i << RBP_INDEX_OFF); @@ -870,7 +878,7 @@ he_init_group(struct he_dev *he_dev, int group) CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq), &he_dev->rbrq_phys); if (he_dev->rbrq_base == NULL) { hprintk("failed to allocate rbrq\n"); - return -ENOMEM; + goto out_free_rbpl_virt; } memset(he_dev->rbrq_base, 0, CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq)); @@ -894,7 +902,7 @@ he_init_group(struct he_dev *he_dev, int group) CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq), &he_dev->tbrq_phys); if (he_dev->tbrq_base == NULL) { hprintk("failed to allocate tbrq\n"); - return -ENOMEM; + goto out_free_rbpq_base; } memset(he_dev->tbrq_base, 0, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq)); @@ -906,6 +914,39 @@ he_init_group(struct he_dev *he_dev, int group) he_writel(he_dev, CONFIG_TBRQ_THRESH, G0_TBRQ_THRESH + (group * 16)); return 0; + +out_free_rbpq_base: + pci_free_consistent(he_dev->pci_dev, CONFIG_RBRQ_SIZE * + sizeof(struct he_rbrq), he_dev->rbrq_base, + he_dev->rbrq_phys); + i = CONFIG_RBPL_SIZE; +out_free_rbpl_virt: + while (--i) + pci_pool_free(he_dev->rbps_pool, he_dev->rbpl_virt[i].virt, + he_dev->rbps_base[i].phys); + kfree(he_dev->rbpl_virt); + +out_free_rbpl_base: + pci_free_consistent(he_dev->pci_dev, CONFIG_RBPL_SIZE * + sizeof(struct he_rbp), he_dev->rbpl_base, + he_dev->rbpl_phys); +out_destroy_rbpl_pool: + pci_pool_destroy(he_dev->rbpl_pool); + + i = CONFIG_RBPL_SIZE; +out_free_rbps_virt: + while (--i) + pci_pool_free(he_dev->rbpl_pool, he_dev->rbps_virt[i].virt, + he_dev->rbpl_base[i].phys); + kfree(he_dev->rbps_virt); + +out_free_rbps_base: + pci_free_consistent(he_dev->pci_dev, CONFIG_RBPS_SIZE * + sizeof(struct he_rbp), he_dev->rbps_base, + he_dev->rbps_phys); +out_destroy_rbps_pool: + pci_pool_destroy(he_dev->rbps_pool); + return -ENOMEM; } static int __devinit diff --git a/drivers/atm/solos-attrlist.c b/drivers/atm/solos-attrlist.c index efa2808dd94..1a9332e4efe 100644 --- a/drivers/atm/solos-attrlist.c +++ b/drivers/atm/solos-attrlist.c @@ -25,6 +25,10 @@ SOLOS_ATTR_RO(RSCorrectedErrorsUp) SOLOS_ATTR_RO(RSUnCorrectedErrorsUp) SOLOS_ATTR_RO(InterleaveRDn) SOLOS_ATTR_RO(InterleaveRUp) +SOLOS_ATTR_RO(BisRDn) +SOLOS_ATTR_RO(BisRUp) +SOLOS_ATTR_RO(INPdown) +SOLOS_ATTR_RO(INPup) SOLOS_ATTR_RO(ShowtimeStart) SOLOS_ATTR_RO(ATURVendor) SOLOS_ATTR_RO(ATUCCountry) @@ -62,6 +66,13 @@ SOLOS_ATTR_RW(Defaults) SOLOS_ATTR_RW(LineMode) SOLOS_ATTR_RW(Profile) SOLOS_ATTR_RW(DetectNoise) +SOLOS_ATTR_RW(BisAForceSNRMarginDn) +SOLOS_ATTR_RW(BisMForceSNRMarginDn) +SOLOS_ATTR_RW(BisAMaxMargin) +SOLOS_ATTR_RW(BisMMaxMargin) +SOLOS_ATTR_RW(AnnexAForceSNRMarginDn) +SOLOS_ATTR_RW(AnnexAMaxMargin) +SOLOS_ATTR_RW(AnnexMMaxMargin) SOLOS_ATTR_RO(SupportedAnnexes) SOLOS_ATTR_RO(Status) SOLOS_ATTR_RO(TotalStart) diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 307321b32cb..c5f5186d62a 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -59,21 +59,29 @@ #define RX_DMA_ADDR(port) (0x30 + (4 * (port))) #define DATA_RAM_SIZE 32768 -#define BUF_SIZE 4096 +#define BUF_SIZE 2048 +#define OLD_BUF_SIZE 4096 /* For FPGA versions <= 2*/ #define FPGA_PAGE 528 /* FPGA flash page size*/ #define SOLOS_PAGE 512 /* Solos flash page size*/ #define FPGA_BLOCK (FPGA_PAGE * 8) /* FPGA flash block size*/ #define SOLOS_BLOCK (SOLOS_PAGE * 8) /* Solos flash block size*/ -#define RX_BUF(card, nr) ((card->buffers) + (nr)*BUF_SIZE*2) -#define TX_BUF(card, nr) ((card->buffers) + (nr)*BUF_SIZE*2 + BUF_SIZE) +#define RX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2) +#define TX_BUF(card, nr) ((card->buffers) + (nr)*(card->buffer_size)*2 + (card->buffer_size)) +#define FLASH_BUF ((card->buffers) + 4*(card->buffer_size)*2) #define RX_DMA_SIZE 2048 +#define FPGA_VERSION(a,b) (((a) << 8) + (b)) +#define LEGACY_BUFFERS 2 +#define DMA_SUPPORTED 4 + static int reset = 0; static int atmdebug = 0; static int firmware_upgrade = 0; static int fpga_upgrade = 0; +static int db_firmware_upgrade = 0; +static int db_fpga_upgrade = 0; struct pkt_hdr { __le16 size; @@ -116,6 +124,8 @@ struct solos_card { wait_queue_head_t param_wq; wait_queue_head_t fw_wq; int using_dma; + int fpga_version; + int buffer_size; }; @@ -136,10 +146,14 @@ MODULE_PARM_DESC(reset, "Reset Solos chips on startup"); MODULE_PARM_DESC(atmdebug, "Print ATM data"); MODULE_PARM_DESC(firmware_upgrade, "Initiate Solos firmware upgrade"); MODULE_PARM_DESC(fpga_upgrade, "Initiate FPGA upgrade"); +MODULE_PARM_DESC(db_firmware_upgrade, "Initiate daughter board Solos firmware upgrade"); +MODULE_PARM_DESC(db_fpga_upgrade, "Initiate daughter board FPGA upgrade"); module_param(reset, int, 0444); module_param(atmdebug, int, 0644); module_param(firmware_upgrade, int, 0444); module_param(fpga_upgrade, int, 0444); +module_param(db_firmware_upgrade, int, 0444); +module_param(db_fpga_upgrade, int, 0444); static void fpga_queue(struct solos_card *card, int port, struct sk_buff *skb, struct atm_vcc *vcc); @@ -517,10 +531,32 @@ static int flash_upgrade(struct solos_card *card, int chip) if (chip == 0) { fw_name = "solos-FPGA.bin"; blocksize = FPGA_BLOCK; - } else { + } + + if (chip == 1) { fw_name = "solos-Firmware.bin"; blocksize = SOLOS_BLOCK; } + + if (chip == 2){ + if (card->fpga_version > LEGACY_BUFFERS){ + fw_name = "solos-db-FPGA.bin"; + blocksize = FPGA_BLOCK; + } else { + dev_info(&card->dev->dev, "FPGA version doesn't support daughter board upgrades\n"); + return -EPERM; + } + } + + if (chip == 3){ + if (card->fpga_version > LEGACY_BUFFERS){ + fw_name = "solos-Firmware.bin"; + blocksize = SOLOS_BLOCK; + } else { + dev_info(&card->dev->dev, "FPGA version doesn't support daughter board upgrades\n"); + return -EPERM; + } + } if (request_firmware(&fw, fw_name, &card->dev->dev)) return -ENOENT; @@ -536,8 +572,10 @@ static int flash_upgrade(struct solos_card *card, int chip) data32 = ioread32(card->config_regs + FPGA_MODE); /* Set mode to Chip Erase */ - dev_info(&card->dev->dev, "Set FPGA Flash mode to %s Chip Erase\n", - chip?"Solos":"FPGA"); + if(chip == 0 || chip == 2) + dev_info(&card->dev->dev, "Set FPGA Flash mode to FPGA Chip Erase\n"); + if(chip == 1 || chip == 3) + dev_info(&card->dev->dev, "Set FPGA Flash mode to Solos Chip Erase\n"); iowrite32((chip * 2), card->config_regs + FLASH_MODE); @@ -557,7 +595,10 @@ static int flash_upgrade(struct solos_card *card, int chip) /* Copy block to buffer, swapping each 16 bits */ for(i = 0; i < blocksize; i += 4) { uint32_t word = swahb32p((uint32_t *)(fw->data + offset + i)); - iowrite32(word, RX_BUF(card, 3) + i); + if(card->fpga_version > LEGACY_BUFFERS) + iowrite32(word, FLASH_BUF + i); + else + iowrite32(word, RX_BUF(card, 3) + i); } /* Specify block number and then trigger flash write */ @@ -630,6 +671,10 @@ void solos_bh(unsigned long card_arg) memcpy_fromio(header, RX_BUF(card, port), sizeof(*header)); size = le16_to_cpu(header->size); + if (size > (card->buffer_size - sizeof(*header))){ + dev_warn(&card->dev->dev, "Invalid buffer size\n"); + continue; + } skb = alloc_skb(size + 1, GFP_ATOMIC); if (!skb) { @@ -1094,12 +1139,18 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) fpga_ver = (data32 & 0x0000FFFF); major_ver = ((data32 & 0xFF000000) >> 24); minor_ver = ((data32 & 0x00FF0000) >> 16); + card->fpga_version = FPGA_VERSION(major_ver,minor_ver); + if (card->fpga_version > LEGACY_BUFFERS) + card->buffer_size = BUF_SIZE; + else + card->buffer_size = OLD_BUF_SIZE; dev_info(&dev->dev, "Solos FPGA Version %d.%02d svn-%d\n", major_ver, minor_ver, fpga_ver); - if (0 && fpga_ver > 27) + if (card->fpga_version >= DMA_SUPPORTED){ card->using_dma = 1; - else { + } else { + card->using_dma = 0; /* Set RX empty flag for all ports */ iowrite32(0xF0, card->config_regs + FLAGS_ADDR); } @@ -1131,6 +1182,12 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) if (firmware_upgrade) flash_upgrade(card, 1); + if (db_fpga_upgrade) + flash_upgrade(card, 2); + + if (db_firmware_upgrade) + flash_upgrade(card, 3); + err = atm_init(card); if (err) goto out_free_irq; diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 71d1b9bab70..614da5b8613 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -3412,7 +3412,7 @@ static int cdrom_print_info(const char *header, int val, char *info, return 0; } -static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp, +static int cdrom_sysctl_info(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int pos; @@ -3489,7 +3489,7 @@ static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp, goto done; doit: mutex_unlock(&cdrom_mutex); - return proc_dostring(ctl, write, filp, buffer, lenp, ppos); + return proc_dostring(ctl, write, buffer, lenp, ppos); done: printk(KERN_INFO "cdrom: info buffer too small\n"); goto doit; @@ -3525,12 +3525,12 @@ static void cdrom_update_settings(void) mutex_unlock(&cdrom_mutex); } -static int cdrom_sysctl_handler(ctl_table *ctl, int write, struct file * filp, +static int cdrom_sysctl_handler(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) { diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 6a06913b01d..08a6f50ae79 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -1087,6 +1087,14 @@ config MMTIMER The mmtimer device allows direct userspace access to the Altix system timer. +config UV_MMTIMER + tristate "UV_MMTIMER Memory mapped RTC for SGI UV" + depends on X86_UV + default m + help + The uv_mmtimer device allows direct userspace access to the + UV system timer. + source "drivers/char/tpm/Kconfig" config TELCLOCK diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 66f779ad4f4..19a79dd79ee 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -58,6 +58,7 @@ obj-$(CONFIG_RAW_DRIVER) += raw.o obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o obj-$(CONFIG_MSPEC) += mspec.o obj-$(CONFIG_MMTIMER) += mmtimer.o +obj-$(CONFIG_UV_MMTIMER) += uv_mmtimer.o obj-$(CONFIG_VIOTAPE) += viotape.o obj-$(CONFIG_HVCS) += hvcs.o obj-$(CONFIG_IBM_BSR) += bsr.o diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 1540e693d91..4068467ce7b 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -46,6 +46,8 @@ #define PCI_DEVICE_ID_INTEL_Q35_IG 0x29B2 #define PCI_DEVICE_ID_INTEL_Q33_HB 0x29D0 #define PCI_DEVICE_ID_INTEL_Q33_IG 0x29D2 +#define PCI_DEVICE_ID_INTEL_B43_HB 0x2E40 +#define PCI_DEVICE_ID_INTEL_B43_IG 0x2E42 #define PCI_DEVICE_ID_INTEL_GM45_HB 0x2A40 #define PCI_DEVICE_ID_INTEL_GM45_IG 0x2A42 #define PCI_DEVICE_ID_INTEL_IGD_E_HB 0x2E00 @@ -91,6 +93,7 @@ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G45_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_GM45_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G41_HB || \ + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_B43_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_D_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_M_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MA_HB) @@ -804,23 +807,39 @@ static void intel_i830_setup_flush(void) if (!intel_private.i8xx_page) return; - /* make page uncached */ - map_page_into_agp(intel_private.i8xx_page); - intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page); if (!intel_private.i8xx_flush_page) intel_i830_fini_flush(); } +static void +do_wbinvd(void *null) +{ + wbinvd(); +} + +/* The chipset_flush interface needs to get data that has already been + * flushed out of the CPU all the way out to main memory, because the GPU + * doesn't snoop those buffers. + * + * The 8xx series doesn't have the same lovely interface for flushing the + * chipset write buffers that the later chips do. According to the 865 + * specs, it's 64 octwords, or 1KB. So, to get those previous things in + * that buffer out, we just fill 1KB and clflush it out, on the assumption + * that it'll push whatever was in there out. It appears to work. + */ static void intel_i830_chipset_flush(struct agp_bridge_data *bridge) { unsigned int *pg = intel_private.i8xx_flush_page; - int i; - for (i = 0; i < 256; i += 2) - *(pg + i) = i; + memset(pg, 0, 1024); - wmb(); + if (cpu_has_clflush) { + clflush_cache_range(pg, 1024); + } else { + if (on_each_cpu(do_wbinvd, NULL, 1) != 0) + printk(KERN_ERR "Timed out waiting for cache flush.\n"); + } } /* The intel i830 automatically initializes the agp aperture during POST. @@ -1341,6 +1360,7 @@ static void intel_i965_get_gtt_range(int *gtt_offset, int *gtt_size) case PCI_DEVICE_ID_INTEL_Q45_HB: case PCI_DEVICE_ID_INTEL_G45_HB: case PCI_DEVICE_ID_INTEL_G41_HB: + case PCI_DEVICE_ID_INTEL_B43_HB: case PCI_DEVICE_ID_INTEL_IGDNG_D_HB: case PCI_DEVICE_ID_INTEL_IGDNG_M_HB: case PCI_DEVICE_ID_INTEL_IGDNG_MA_HB: @@ -2335,6 +2355,8 @@ static const struct intel_driver_description { "Q45/Q43", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_G45_HB, PCI_DEVICE_ID_INTEL_G45_IG, 0, "G45/G43", NULL, &intel_i965_driver }, + { PCI_DEVICE_ID_INTEL_B43_HB, PCI_DEVICE_ID_INTEL_B43_IG, 0, + "B43", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG, 0, "G41", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_IGDNG_D_HB, PCI_DEVICE_ID_INTEL_IGDNG_D_IG, 0, @@ -2535,6 +2557,7 @@ static struct pci_device_id agp_intel_pci_table[] = { ID(PCI_DEVICE_ID_INTEL_Q45_HB), ID(PCI_DEVICE_ID_INTEL_G45_HB), ID(PCI_DEVICE_ID_INTEL_G41_HB), + ID(PCI_DEVICE_ID_INTEL_B43_HB), ID(PCI_DEVICE_ID_INTEL_IGDNG_D_HB), ID(PCI_DEVICE_ID_INTEL_IGDNG_M_HB), ID(PCI_DEVICE_ID_INTEL_IGDNG_MA_HB), diff --git a/drivers/char/bfin-otp.c b/drivers/char/bfin-otp.c index 0a01329451e..e3dd24bff51 100644 --- a/drivers/char/bfin-otp.c +++ b/drivers/char/bfin-otp.c @@ -1,8 +1,7 @@ /* * Blackfin On-Chip OTP Memory Interface - * Supports BF52x/BF54x * - * Copyright 2007-2008 Analog Devices Inc. + * Copyright 2007-2009 Analog Devices Inc. * * Enter bugs at http://blackfin.uclinux.org/ * @@ -17,8 +16,10 @@ #include <linux/module.h> #include <linux/mutex.h> #include <linux/types.h> +#include <mtd/mtd-abi.h> #include <asm/blackfin.h> +#include <asm/bfrom.h> #include <asm/uaccess.h> #define stamp(fmt, args...) pr_debug("%s:%i: " fmt "\n", __func__, __LINE__, ## args) @@ -30,39 +31,6 @@ static DEFINE_MUTEX(bfin_otp_lock); -/* OTP Boot ROM functions */ -#define _BOOTROM_OTP_COMMAND 0xEF000018 -#define _BOOTROM_OTP_READ 0xEF00001A -#define _BOOTROM_OTP_WRITE 0xEF00001C - -static u32 (* const otp_command)(u32 command, u32 value) = (void *)_BOOTROM_OTP_COMMAND; -static u32 (* const otp_read)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_READ; -static u32 (* const otp_write)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_WRITE; - -/* otp_command(): defines for "command" */ -#define OTP_INIT 0x00000001 -#define OTP_CLOSE 0x00000002 - -/* otp_{read,write}(): defines for "flags" */ -#define OTP_LOWER_HALF 0x00000000 /* select upper/lower 64-bit half (bit 0) */ -#define OTP_UPPER_HALF 0x00000001 -#define OTP_NO_ECC 0x00000010 /* do not use ECC */ -#define OTP_LOCK 0x00000020 /* sets page protection bit for page */ -#define OTP_ACCESS_READ 0x00001000 -#define OTP_ACCESS_READWRITE 0x00002000 - -/* Return values for all functions */ -#define OTP_SUCCESS 0x00000000 -#define OTP_MASTER_ERROR 0x001 -#define OTP_WRITE_ERROR 0x003 -#define OTP_READ_ERROR 0x005 -#define OTP_ACC_VIO_ERROR 0x009 -#define OTP_DATA_MULT_ERROR 0x011 -#define OTP_ECC_MULT_ERROR 0x021 -#define OTP_PREV_WR_ERROR 0x041 -#define OTP_DATA_SB_WARN 0x100 -#define OTP_ECC_SB_WARN 0x200 - /** * bfin_otp_read - Read OTP pages * @@ -86,9 +54,11 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count, page = *pos / (sizeof(u64) * 2); while (bytes_done < count) { flags = (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF); - stamp("processing page %i (%s)", page, (flags == OTP_UPPER_HALF ? "upper" : "lower")); - ret = otp_read(page, flags, &content); + stamp("processing page %i (0x%x:%s)", page, flags, + (flags & OTP_UPPER_HALF ? "upper" : "lower")); + ret = bfrom_OtpRead(page, flags, &content); if (ret & OTP_MASTER_ERROR) { + stamp("error from otp: 0x%x", ret); bytes_done = -EIO; break; } @@ -96,7 +66,7 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count, bytes_done = -EFAULT; break; } - if (flags == OTP_UPPER_HALF) + if (flags & OTP_UPPER_HALF) ++page; bytes_done += sizeof(content); *pos += sizeof(content); @@ -108,14 +78,53 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count, } #ifdef CONFIG_BFIN_OTP_WRITE_ENABLE +static bool allow_writes; + +/** + * bfin_otp_init_timing - setup OTP timing parameters + * + * Required before doing any write operation. Algorithms from HRM. + */ +static u32 bfin_otp_init_timing(void) +{ + u32 tp1, tp2, tp3, timing; + + tp1 = get_sclk() / 1000000; + tp2 = (2 * get_sclk() / 10000000) << 8; + tp3 = (0x1401) << 15; + timing = tp1 | tp2 | tp3; + if (bfrom_OtpCommand(OTP_INIT, timing)) + return 0; + + return timing; +} + +/** + * bfin_otp_deinit_timing - set timings to only allow reads + * + * Should be called after all writes are done. + */ +static void bfin_otp_deinit_timing(u32 timing) +{ + /* mask bits [31:15] so that any attempts to write fail */ + bfrom_OtpCommand(OTP_CLOSE, 0); + bfrom_OtpCommand(OTP_INIT, timing & ~(-1 << 15)); + bfrom_OtpCommand(OTP_CLOSE, 0); +} + /** - * bfin_otp_write - Write OTP pages + * bfin_otp_write - write OTP pages * * All writes must be in half page chunks (half page == 64 bits). */ static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t count, loff_t *pos) { - stampit(); + ssize_t bytes_done; + u32 timing, page, base_flags, flags, ret; + u64 content; + + if (!allow_writes) + return -EACCES; if (count % sizeof(u64)) return -EMSGSIZE; @@ -123,20 +132,96 @@ static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t if (mutex_lock_interruptible(&bfin_otp_lock)) return -ERESTARTSYS; - /* need otp_init() documentation before this can be implemented */ + stampit(); + + timing = bfin_otp_init_timing(); + if (timing == 0) { + mutex_unlock(&bfin_otp_lock); + return -EIO; + } + + base_flags = OTP_CHECK_FOR_PREV_WRITE; + + bytes_done = 0; + page = *pos / (sizeof(u64) * 2); + while (bytes_done < count) { + flags = base_flags | (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF); + stamp("processing page %i (0x%x:%s) from %p", page, flags, + (flags & OTP_UPPER_HALF ? "upper" : "lower"), buff + bytes_done); + if (copy_from_user(&content, buff + bytes_done, sizeof(content))) { + bytes_done = -EFAULT; + break; + } + ret = bfrom_OtpWrite(page, flags, &content); + if (ret & OTP_MASTER_ERROR) { + stamp("error from otp: 0x%x", ret); + bytes_done = -EIO; + break; + } + if (flags & OTP_UPPER_HALF) + ++page; + bytes_done += sizeof(content); + *pos += sizeof(content); + } + + bfin_otp_deinit_timing(timing); mutex_unlock(&bfin_otp_lock); + return bytes_done; +} + +static long bfin_otp_ioctl(struct file *filp, unsigned cmd, unsigned long arg) +{ + stampit(); + + switch (cmd) { + case OTPLOCK: { + u32 timing; + int ret = -EIO; + + if (!allow_writes) + return -EACCES; + + if (mutex_lock_interruptible(&bfin_otp_lock)) + return -ERESTARTSYS; + + timing = bfin_otp_init_timing(); + if (timing) { + u32 otp_result = bfrom_OtpWrite(arg, OTP_LOCK, NULL); + stamp("locking page %lu resulted in 0x%x", arg, otp_result); + if (!(otp_result & OTP_MASTER_ERROR)) + ret = 0; + + bfin_otp_deinit_timing(timing); + } + + mutex_unlock(&bfin_otp_lock); + + return ret; + } + + case MEMLOCK: + allow_writes = false; + return 0; + + case MEMUNLOCK: + allow_writes = true; + return 0; + } + return -EINVAL; } #else # define bfin_otp_write NULL +# define bfin_otp_ioctl NULL #endif static struct file_operations bfin_otp_fops = { - .owner = THIS_MODULE, - .read = bfin_otp_read, - .write = bfin_otp_write, + .owner = THIS_MODULE, + .unlocked_ioctl = bfin_otp_ioctl, + .read = bfin_otp_read, + .write = bfin_otp_write, }; static struct miscdevice bfin_otp_misc_device = { diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 4a9f3492b92..70a770ac013 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -166,9 +166,8 @@ static irqreturn_t hpet_interrupt(int irq, void *data) unsigned long m, t; t = devp->hd_ireqfreq; - m = read_counter(&devp->hd_hpet->hpet_mc); - write_counter(t + m + devp->hd_hpets->hp_delta, - &devp->hd_timer->hpet_compare); + m = read_counter(&devp->hd_timer->hpet_compare); + write_counter(t + m, &devp->hd_timer->hpet_compare); } if (devp->hd_flags & HPET_SHARED_IRQ) @@ -504,21 +503,25 @@ static int hpet_ioctl_ieon(struct hpet_dev *devp) g = v | Tn_32MODE_CNF_MASK | Tn_INT_ENB_CNF_MASK; if (devp->hd_flags & HPET_PERIODIC) { - write_counter(t, &timer->hpet_compare); g |= Tn_TYPE_CNF_MASK; - v |= Tn_TYPE_CNF_MASK; - writeq(v, &timer->hpet_config); - v |= Tn_VAL_SET_CNF_MASK; + v |= Tn_TYPE_CNF_MASK | Tn_VAL_SET_CNF_MASK; writeq(v, &timer->hpet_config); local_irq_save(flags); - /* NOTE: what we modify here is a hidden accumulator + /* + * NOTE: First we modify the hidden accumulator * register supported by periodic-capable comparators. * We never want to modify the (single) counter; that - * would affect all the comparators. + * would affect all the comparators. The value written + * is the counter value when the first interrupt is due. */ m = read_counter(&hpet->hpet_mc); write_counter(t + m + hpetp->hp_delta, &timer->hpet_compare); + /* + * Then we modify the comparator, indicating the period + * for subsequent interrupt. + */ + write_counter(t, &timer->hpet_compare); } else { local_irq_save(flags); m = read_counter(&hpet->hpet_mc); diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 0aede1d6a9e..6c8b65d069e 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -690,7 +690,7 @@ static ssize_t read_zero(struct file * file, char __user * buf, if (chunk > PAGE_SIZE) chunk = PAGE_SIZE; /* Just for latency reasons */ - unwritten = clear_user(buf, chunk); + unwritten = __clear_user(buf, chunk); written += chunk - unwritten; if (unwritten) break; diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c index 94ad2c3bfc4..a4ec50c9507 100644 --- a/drivers/char/mwave/mwavedd.c +++ b/drivers/char/mwave/mwavedd.c @@ -281,12 +281,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, case IOCTL_MW_REGISTER_IPC: { unsigned int ipcnum = (unsigned int) ioarg; - PRINTK_3(TRACE_MWAVE, - "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC" - " ipcnum %x entry usIntCount %x\n", - ipcnum, - pDrvData->IPCs[ipcnum].usIntCount); - if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) { PRINTK_ERROR(KERN_ERR_MWAVE "mwavedd::mwave_ioctl:" @@ -295,6 +289,12 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, ipcnum); return -EINVAL; } + PRINTK_3(TRACE_MWAVE, + "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC" + " ipcnum %x entry usIntCount %x\n", + ipcnum, + pDrvData->IPCs[ipcnum].usIntCount); + lock_kernel(); pDrvData->IPCs[ipcnum].bIsHere = FALSE; pDrvData->IPCs[ipcnum].bIsEnabled = TRUE; @@ -310,11 +310,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, case IOCTL_MW_GET_IPC: { unsigned int ipcnum = (unsigned int) ioarg; - PRINTK_3(TRACE_MWAVE, - "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC" - " ipcnum %x, usIntCount %x\n", - ipcnum, - pDrvData->IPCs[ipcnum].usIntCount); if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) { PRINTK_ERROR(KERN_ERR_MWAVE "mwavedd::mwave_ioctl:" @@ -322,6 +317,11 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, " Invalid ipcnum %x\n", ipcnum); return -EINVAL; } + PRINTK_3(TRACE_MWAVE, + "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC" + " ipcnum %x, usIntCount %x\n", + ipcnum, + pDrvData->IPCs[ipcnum].usIntCount); lock_kernel(); if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) { diff --git a/drivers/char/random.c b/drivers/char/random.c index d8a9255e1a3..04b505e5a5e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1231,7 +1231,7 @@ static char sysctl_bootid[16]; * as an ASCII string in the standard UUID format. If accesses via the * sysctl system call, it is returned as 16 bytes of binary data. */ -static int proc_do_uuid(ctl_table *table, int write, struct file *filp, +static int proc_do_uuid(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { ctl_table fake_table; @@ -1254,7 +1254,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp, fake_table.data = buf; fake_table.maxlen = sizeof(buf); - return proc_dostring(&fake_table, write, filp, buffer, lenp, ppos); + return proc_dostring(&fake_table, write, buffer, lenp, ppos); } static int uuid_strategy(ctl_table *table, diff --git a/drivers/char/rio/rioctrl.c b/drivers/char/rio/rioctrl.c index eecee0f576d..74339559f0b 100644 --- a/drivers/char/rio/rioctrl.c +++ b/drivers/char/rio/rioctrl.c @@ -873,7 +873,7 @@ int riocontrol(struct rio_info *p, dev_t dev, int cmd, unsigned long arg, int su /* ** It is important that the product code is an unsigned object! */ - if (DownLoad.ProductCode > MAX_PRODUCT) { + if (DownLoad.ProductCode >= MAX_PRODUCT) { rio_dprintk(RIO_DEBUG_CTRL, "RIO_DOWNLOAD: Bad product code %d passed\n", DownLoad.ProductCode); p->RIOError.Error = NO_SUCH_PRODUCT; return -ENXIO; diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 32b957efa42..45d58002b06 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -742,7 +742,7 @@ EXPORT_SYMBOL_GPL(tpm_pcr_read); * the module usage count. */ #define TPM_ORD_PCR_EXTEND cpu_to_be32(20) -#define EXTEND_PCR_SIZE 34 +#define EXTEND_PCR_RESULT_SIZE 34 static struct tpm_input_header pcrextend_header = { .tag = TPM_TAG_RQU_COMMAND, .length = cpu_to_be32(34), @@ -760,10 +760,9 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) return -ENODEV; cmd.header.in = pcrextend_header; - BUG_ON(be32_to_cpu(cmd.header.in.length) > EXTEND_PCR_SIZE); cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx); memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE); - rc = transmit_cmd(chip, &cmd, cmd.header.in.length, + rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE, "attempting extend a PCR value"); module_put(chip->dev->driver->owner); diff --git a/drivers/char/uv_mmtimer.c b/drivers/char/uv_mmtimer.c new file mode 100644 index 00000000000..867b67be9f0 --- /dev/null +++ b/drivers/char/uv_mmtimer.c @@ -0,0 +1,216 @@ +/* + * Timer device implementation for SGI UV platform. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2009 Silicon Graphics, Inc. All rights reserved. + * + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/ioctl.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/mmtimer.h> +#include <linux/miscdevice.h> +#include <linux/posix-timers.h> +#include <linux/interrupt.h> +#include <linux/time.h> +#include <linux/math64.h> +#include <linux/smp_lock.h> + +#include <asm/genapic.h> +#include <asm/uv/uv_hub.h> +#include <asm/uv/bios.h> +#include <asm/uv/uv.h> + +MODULE_AUTHOR("Dimitri Sivanich <sivanich@sgi.com>"); +MODULE_DESCRIPTION("SGI UV Memory Mapped RTC Timer"); +MODULE_LICENSE("GPL"); + +/* name of the device, usually in /dev */ +#define UV_MMTIMER_NAME "mmtimer" +#define UV_MMTIMER_DESC "SGI UV Memory Mapped RTC Timer" +#define UV_MMTIMER_VERSION "1.0" + +static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd, + unsigned long arg); +static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma); + +/* + * Period in femtoseconds (10^-15 s) + */ +static unsigned long uv_mmtimer_femtoperiod; + +static const struct file_operations uv_mmtimer_fops = { + .owner = THIS_MODULE, + .mmap = uv_mmtimer_mmap, + .unlocked_ioctl = uv_mmtimer_ioctl, +}; + +/** + * uv_mmtimer_ioctl - ioctl interface for /dev/uv_mmtimer + * @file: file structure for the device + * @cmd: command to execute + * @arg: optional argument to command + * + * Executes the command specified by @cmd. Returns 0 for success, < 0 for + * failure. + * + * Valid commands: + * + * %MMTIMER_GETOFFSET - Should return the offset (relative to the start + * of the page where the registers are mapped) for the counter in question. + * + * %MMTIMER_GETRES - Returns the resolution of the clock in femto (10^-15) + * seconds + * + * %MMTIMER_GETFREQ - Copies the frequency of the clock in Hz to the address + * specified by @arg + * + * %MMTIMER_GETBITS - Returns the number of bits in the clock's counter + * + * %MMTIMER_MMAPAVAIL - Returns 1 if registers can be mmap'd into userspace + * + * %MMTIMER_GETCOUNTER - Gets the current value in the counter and places it + * in the address specified by @arg. + */ +static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int ret = 0; + + switch (cmd) { + case MMTIMER_GETOFFSET: /* offset of the counter */ + /* + * UV RTC register is on its own page + */ + if (PAGE_SIZE <= (1 << 16)) + ret = ((UV_LOCAL_MMR_BASE | UVH_RTC) & (PAGE_SIZE-1)) + / 8; + else + ret = -ENOSYS; + break; + + case MMTIMER_GETRES: /* resolution of the clock in 10^-15 s */ + if (copy_to_user((unsigned long __user *)arg, + &uv_mmtimer_femtoperiod, sizeof(unsigned long))) + ret = -EFAULT; + break; + + case MMTIMER_GETFREQ: /* frequency in Hz */ + if (copy_to_user((unsigned long __user *)arg, + &sn_rtc_cycles_per_second, + sizeof(unsigned long))) + ret = -EFAULT; + break; + + case MMTIMER_GETBITS: /* number of bits in the clock */ + ret = hweight64(UVH_RTC_REAL_TIME_CLOCK_MASK); + break; + + case MMTIMER_MMAPAVAIL: /* can we mmap the clock into userspace? */ + ret = (PAGE_SIZE <= (1 << 16)) ? 1 : 0; + break; + + case MMTIMER_GETCOUNTER: + if (copy_to_user((unsigned long __user *)arg, + (unsigned long *)uv_local_mmr_address(UVH_RTC), + sizeof(unsigned long))) + ret = -EFAULT; + break; + default: + ret = -ENOTTY; + break; + } + return ret; +} + +/** + * uv_mmtimer_mmap - maps the clock's registers into userspace + * @file: file structure for the device + * @vma: VMA to map the registers into + * + * Calls remap_pfn_range() to map the clock's registers into + * the calling process' address space. + */ +static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma) +{ + unsigned long uv_mmtimer_addr; + + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + if (vma->vm_flags & VM_WRITE) + return -EPERM; + + if (PAGE_SIZE > (1 << 16)) + return -ENOSYS; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + uv_mmtimer_addr = UV_LOCAL_MMR_BASE | UVH_RTC; + uv_mmtimer_addr &= ~(PAGE_SIZE - 1); + uv_mmtimer_addr &= 0xfffffffffffffffUL; + + if (remap_pfn_range(vma, vma->vm_start, uv_mmtimer_addr >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot)) { + printk(KERN_ERR "remap_pfn_range failed in uv_mmtimer_mmap\n"); + return -EAGAIN; + } + + return 0; +} + +static struct miscdevice uv_mmtimer_miscdev = { + MISC_DYNAMIC_MINOR, + UV_MMTIMER_NAME, + &uv_mmtimer_fops +}; + + +/** + * uv_mmtimer_init - device initialization routine + * + * Does initial setup for the uv_mmtimer device. + */ +static int __init uv_mmtimer_init(void) +{ + if (!is_uv_system()) { + printk(KERN_ERR "%s: Hardware unsupported\n", UV_MMTIMER_NAME); + return -1; + } + + /* + * Sanity check the cycles/sec variable + */ + if (sn_rtc_cycles_per_second < 100000) { + printk(KERN_ERR "%s: unable to determine clock frequency\n", + UV_MMTIMER_NAME); + return -1; + } + + uv_mmtimer_femtoperiod = ((unsigned long)1E15 + + sn_rtc_cycles_per_second / 2) / + sn_rtc_cycles_per_second; + + if (misc_register(&uv_mmtimer_miscdev)) { + printk(KERN_ERR "%s: failed to register device\n", + UV_MMTIMER_NAME); + return -1; + } + + printk(KERN_INFO "%s: v%s, %ld MHz\n", UV_MMTIMER_DESC, + UV_MMTIMER_VERSION, + sn_rtc_cycles_per_second/(unsigned long)1E6); + + return 0; +} + +module_init(uv_mmtimer_init); diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c index 25b743abfb5..52e6bb70a49 100644 --- a/drivers/dca/dca-core.c +++ b/drivers/dca/dca-core.c @@ -28,7 +28,7 @@ #include <linux/device.h> #include <linux/dca.h> -#define DCA_VERSION "1.8" +#define DCA_VERSION "1.12.1" MODULE_VERSION(DCA_VERSION); MODULE_LICENSE("GPL"); @@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation"); static DEFINE_SPINLOCK(dca_lock); -static LIST_HEAD(dca_providers); +static LIST_HEAD(dca_domains); -static struct dca_provider *dca_find_provider_by_dev(struct device *dev) +static struct pci_bus *dca_pci_rc_from_dev(struct device *dev) { - struct dca_provider *dca, *ret = NULL; + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_bus *bus = pdev->bus; - list_for_each_entry(dca, &dca_providers, node) { - if ((!dev) || (dca->ops->dev_managed(dca, dev))) { - ret = dca; - break; - } + while (bus->parent) + bus = bus->parent; + + return bus; +} + +static struct dca_domain *dca_allocate_domain(struct pci_bus *rc) +{ + struct dca_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_NOWAIT); + if (!domain) + return NULL; + + INIT_LIST_HEAD(&domain->dca_providers); + domain->pci_rc = rc; + + return domain; +} + +static void dca_free_domain(struct dca_domain *domain) +{ + list_del(&domain->node); + kfree(domain); +} + +static struct dca_domain *dca_find_domain(struct pci_bus *rc) +{ + struct dca_domain *domain; + + list_for_each_entry(domain, &dca_domains, node) + if (domain->pci_rc == rc) + return domain; + + return NULL; +} + +static struct dca_domain *dca_get_domain(struct device *dev) +{ + struct pci_bus *rc; + struct dca_domain *domain; + + rc = dca_pci_rc_from_dev(dev); + domain = dca_find_domain(rc); + + if (!domain) { + domain = dca_allocate_domain(rc); + if (domain) + list_add(&domain->node, &dca_domains); + } + + return domain; +} + +static struct dca_provider *dca_find_provider_by_dev(struct device *dev) +{ + struct dca_provider *dca; + struct pci_bus *rc; + struct dca_domain *domain; + + if (dev) { + rc = dca_pci_rc_from_dev(dev); + domain = dca_find_domain(rc); + if (!domain) + return NULL; + } else { + if (!list_empty(&dca_domains)) + domain = list_first_entry(&dca_domains, + struct dca_domain, + node); + else + return NULL; } - return ret; + list_for_each_entry(dca, &domain->dca_providers, node) + if ((!dev) || (dca->ops->dev_managed(dca, dev))) + return dca; + + return NULL; } /** @@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev) struct dca_provider *dca; int err, slot = -ENODEV; unsigned long flags; + struct pci_bus *pci_rc; + struct dca_domain *domain; if (!dev) return -EFAULT; @@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev) return -EEXIST; } - list_for_each_entry(dca, &dca_providers, node) { + pci_rc = dca_pci_rc_from_dev(dev); + domain = dca_find_domain(pci_rc); + if (!domain) { + spin_unlock_irqrestore(&dca_lock, flags); + return -ENODEV; + } + + list_for_each_entry(dca, &domain->dca_providers, node) { slot = dca->ops->add_requester(dca, dev); if (slot >= 0) break; @@ -222,13 +303,19 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev) { int err; unsigned long flags; + struct dca_domain *domain; err = dca_sysfs_add_provider(dca, dev); if (err) return err; spin_lock_irqsave(&dca_lock, flags); - list_add(&dca->node, &dca_providers); + domain = dca_get_domain(dev); + if (!domain) { + spin_unlock_irqrestore(&dca_lock, flags); + return -ENODEV; + } + list_add(&dca->node, &domain->dca_providers); spin_unlock_irqrestore(&dca_lock, flags); blocking_notifier_call_chain(&dca_provider_chain, @@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider); * unregister_dca_provider - remove a dca provider * @dca - struct created by alloc_dca_provider() */ -void unregister_dca_provider(struct dca_provider *dca) +void unregister_dca_provider(struct dca_provider *dca, struct device *dev) { unsigned long flags; + struct pci_bus *pci_rc; + struct dca_domain *domain; blocking_notifier_call_chain(&dca_provider_chain, DCA_PROVIDER_REMOVE, NULL); spin_lock_irqsave(&dca_lock, flags); + list_del(&dca->node); + + pci_rc = dca_pci_rc_from_dev(dev); + domain = dca_find_domain(pci_rc); + if (list_empty(&domain->dca_providers)) + dca_free_domain(domain); + spin_unlock_irqrestore(&dca_lock, flags); dca_sysfs_remove_provider(dca); @@ -276,7 +372,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify); static int __init dca_init(void) { - printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION); + pr_info("dca service started, version %s\n", DCA_VERSION); return dca_sysfs_init(); } diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 81e1020fb51..5903a88351b 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -17,11 +17,15 @@ if DMADEVICES comment "DMA Devices" +config ASYNC_TX_DISABLE_CHANNEL_SWITCH + bool + config INTEL_IOATDMA tristate "Intel I/OAT DMA support" depends on PCI && X86 select DMA_ENGINE select DCA + select ASYNC_TX_DISABLE_CHANNEL_SWITCH help Enable support for the Intel(R) I/OAT DMA engine present in recent Intel Xeon chipsets. @@ -97,6 +101,14 @@ config TXX9_DMAC Support the TXx9 SoC internal DMA controller. This can be integrated in chips such as the Toshiba TX4927/38/39. +config SH_DMAE + tristate "Renesas SuperH DMAC support" + depends on SUPERH && SH_DMA + depends on !SH_DMA_API + select DMA_ENGINE + help + Enable support for the Renesas SuperH DMA controllers. + config DMA_ENGINE bool @@ -116,7 +128,7 @@ config NET_DMA config ASYNC_TX_DMA bool "Async_tx: Offload support for the async_tx api" - depends on DMA_ENGINE && !HIGHMEM64G + depends on DMA_ENGINE help This allows the async_tx api to take advantage of offload engines for memcpy, memset, xor, and raid6 p+q operations. If your platform has diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 40e1e008357..eca71ba78ae 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -1,8 +1,7 @@ obj-$(CONFIG_DMA_ENGINE) += dmaengine.o obj-$(CONFIG_NET_DMA) += iovlock.o obj-$(CONFIG_DMATEST) += dmatest.o -obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o -ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o +obj-$(CONFIG_INTEL_IOATDMA) += ioat/ obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o obj-$(CONFIG_FSL_DMA) += fsldma.o obj-$(CONFIG_MV_XOR) += mv_xor.o @@ -10,3 +9,4 @@ obj-$(CONFIG_DW_DMAC) += dw_dmac.o obj-$(CONFIG_AT_HDMAC) += at_hdmac.o obj-$(CONFIG_MX3_IPU) += ipu/ obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o +obj-$(CONFIG_SH_DMAE) += shdma.o diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index c8522e6f1ad..7585c4164bd 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -87,6 +87,7 @@ static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan, desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys); if (desc) { memset(desc, 0, sizeof(struct at_desc)); + INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->txd, chan); /* txd.flags will be overwritten in prep functions */ desc->txd.flags = DMA_CTRL_ACK; @@ -150,11 +151,11 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) struct at_desc *child; spin_lock_bh(&atchan->lock); - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dev_vdbg(chan2dev(&atchan->chan_common), "moving child desc %p to freelist\n", child); - list_splice_init(&desc->txd.tx_list, &atchan->free_list); + list_splice_init(&desc->tx_list, &atchan->free_list); dev_vdbg(chan2dev(&atchan->chan_common), "moving desc %p to freelist\n", desc); list_add(&desc->desc_node, &atchan->free_list); @@ -247,30 +248,33 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) param = txd->callback_param; /* move children to free_list */ - list_splice_init(&txd->tx_list, &atchan->free_list); + list_splice_init(&desc->tx_list, &atchan->free_list); /* move myself to free_list */ list_move(&desc->desc_node, &atchan->free_list); /* unmap dma addresses */ - if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) - dma_unmap_single(chan2parent(&atchan->chan_common), - desc->lli.daddr, - desc->len, DMA_FROM_DEVICE); - else - dma_unmap_page(chan2parent(&atchan->chan_common), - desc->lli.daddr, - desc->len, DMA_FROM_DEVICE); - } - if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) - dma_unmap_single(chan2parent(&atchan->chan_common), - desc->lli.saddr, - desc->len, DMA_TO_DEVICE); - else - dma_unmap_page(chan2parent(&atchan->chan_common), - desc->lli.saddr, - desc->len, DMA_TO_DEVICE); + if (!atchan->chan_common.private) { + struct device *parent = chan2parent(&atchan->chan_common); + if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(parent, + desc->lli.daddr, + desc->len, DMA_FROM_DEVICE); + else + dma_unmap_page(parent, + desc->lli.daddr, + desc->len, DMA_FROM_DEVICE); + } + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(parent, + desc->lli.saddr, + desc->len, DMA_TO_DEVICE); + else + dma_unmap_page(parent, + desc->lli.saddr, + desc->len, DMA_TO_DEVICE); + } } /* @@ -334,7 +338,7 @@ static void atc_cleanup_descriptors(struct at_dma_chan *atchan) /* This one is currently in progress */ return; - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) if (!(child->lli.ctrla & ATC_DONE)) /* Currently in progress */ return; @@ -407,7 +411,7 @@ static void atc_handle_error(struct at_dma_chan *atchan) dev_crit(chan2dev(&atchan->chan_common), " cookie: %d\n", bad_desc->txd.cookie); atc_dump_lli(atchan, &bad_desc->lli); - list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) + list_for_each_entry(child, &bad_desc->tx_list, desc_node) atc_dump_lli(atchan, &child->lli); /* Pretend the descriptor completed successfully */ @@ -587,7 +591,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, prev->lli.dscr = desc->txd.phys; /* insert the link descriptor to the LD ring */ list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; } @@ -646,8 +650,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg_width = atslave->reg_width; - sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction); - ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla; ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN; @@ -687,7 +689,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, prev->lli.dscr = desc->txd.phys; /* insert the link descriptor to the LD ring */ list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; total_len += len; @@ -729,7 +731,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, prev->lli.dscr = desc->txd.phys; /* insert the link descriptor to the LD ring */ list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; total_len += len; diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index 4c972afc49e..495457e3dc4 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -165,6 +165,7 @@ struct at_desc { struct at_lli lli; /* THEN values for driver housekeeping */ + struct list_head tx_list; struct dma_async_tx_descriptor txd; struct list_head desc_node; size_t len; diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 5a87384ea4f..bd0b248de2c 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -608,6 +608,40 @@ void dmaengine_put(void) } EXPORT_SYMBOL(dmaengine_put); +static bool device_has_all_tx_types(struct dma_device *device) +{ + /* A device that satisfies this test has channels that will never cause + * an async_tx channel switch event as all possible operation types can + * be handled. + */ + #ifdef CONFIG_ASYNC_TX_DMA + if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE) + if (!dma_has_cap(DMA_MEMCPY, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE) + if (!dma_has_cap(DMA_MEMSET, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) + if (!dma_has_cap(DMA_XOR, device->cap_mask)) + return false; + #endif + + #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) + if (!dma_has_cap(DMA_PQ, device->cap_mask)) + return false; + #endif + + return true; +} + static int get_dma_id(struct dma_device *device) { int rc; @@ -644,8 +678,12 @@ int dma_async_device_register(struct dma_device *device) !device->device_prep_dma_memcpy); BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && !device->device_prep_dma_xor); - BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && - !device->device_prep_dma_zero_sum); + BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) && + !device->device_prep_dma_xor_val); + BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) && + !device->device_prep_dma_pq); + BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && + !device->device_prep_dma_pq_val); BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && !device->device_prep_dma_memset); BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && @@ -661,6 +699,12 @@ int dma_async_device_register(struct dma_device *device) BUG_ON(!device->device_issue_pending); BUG_ON(!device->dev); + /* note: this only matters in the + * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case + */ + if (device_has_all_tx_types(device)) + dma_cap_set(DMA_ASYNC_TX, device->cap_mask); + idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL); if (!idr_ref) return -ENOMEM; @@ -933,55 +977,29 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, { tx->chan = chan; spin_lock_init(&tx->lock); - INIT_LIST_HEAD(&tx->tx_list); } EXPORT_SYMBOL(dma_async_tx_descriptor_init); /* dma_wait_for_async_tx - spin wait for a transaction to complete * @tx: in-flight transaction to wait on - * - * This routine assumes that tx was obtained from a call to async_memcpy, - * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped - * and submitted). Walking the parent chain is only meant to cover for DMA - * drivers that do not implement the DMA_INTERRUPT capability and may race with - * the driver's descriptor cleanup routine. */ enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) { - enum dma_status status; - struct dma_async_tx_descriptor *iter; - struct dma_async_tx_descriptor *parent; + unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); if (!tx) return DMA_SUCCESS; - WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for" - " %s\n", __func__, dma_chan_name(tx->chan)); - - /* poll through the dependency chain, return when tx is complete */ - do { - iter = tx; - - /* find the root of the unsubmitted dependency chain */ - do { - parent = iter->parent; - if (!parent) - break; - else - iter = parent; - } while (parent); - - /* there is a small window for ->parent == NULL and - * ->cookie == -EBUSY - */ - while (iter->cookie == -EBUSY) - cpu_relax(); - - status = dma_sync_wait(iter->chan, iter->cookie); - } while (status == DMA_IN_PROGRESS || (iter != tx)); - - return status; + while (tx->cookie == -EBUSY) { + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { + pr_err("%s timeout waiting for descriptor submission\n", + __func__); + return DMA_ERROR; + } + cpu_relax(); + } + return dma_sync_wait(tx->chan, tx->cookie); } EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index d93017fc787..a32a4cf7b1e 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -48,6 +48,11 @@ module_param(xor_sources, uint, S_IRUGO); MODULE_PARM_DESC(xor_sources, "Number of xor source buffers (default: 3)"); +static unsigned int pq_sources = 3; +module_param(pq_sources, uint, S_IRUGO); +MODULE_PARM_DESC(pq_sources, + "Number of p+q source buffers (default: 3)"); + /* * Initialization patterns. All bytes in the source buffer has bit 7 * set, all bytes in the destination buffer has bit 7 cleared. @@ -232,6 +237,7 @@ static int dmatest_func(void *data) dma_cookie_t cookie; enum dma_status status; enum dma_ctrl_flags flags; + u8 pq_coefs[pq_sources]; int ret; int src_cnt; int dst_cnt; @@ -248,6 +254,11 @@ static int dmatest_func(void *data) else if (thread->type == DMA_XOR) { src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ dst_cnt = 1; + } else if (thread->type == DMA_PQ) { + src_cnt = pq_sources | 1; /* force odd to ensure dst = src */ + dst_cnt = 2; + for (i = 0; i < pq_sources; i++) + pq_coefs[i] = 1; } else goto err_srcs; @@ -283,6 +294,7 @@ static int dmatest_func(void *data) dma_addr_t dma_dsts[dst_cnt]; struct completion cmp; unsigned long tmo = msecs_to_jiffies(3000); + u8 align = 0; total_tests++; @@ -290,6 +302,18 @@ static int dmatest_func(void *data) src_off = dmatest_random() % (test_buf_size - len + 1); dst_off = dmatest_random() % (test_buf_size - len + 1); + /* honor alignment restrictions */ + if (thread->type == DMA_MEMCPY) + align = dev->copy_align; + else if (thread->type == DMA_XOR) + align = dev->xor_align; + else if (thread->type == DMA_PQ) + align = dev->pq_align; + + len = (len >> align) << align; + src_off = (src_off >> align) << align; + dst_off = (dst_off >> align) << align; + dmatest_init_srcs(thread->srcs, src_off, len); dmatest_init_dsts(thread->dsts, dst_off, len); @@ -306,6 +330,7 @@ static int dmatest_func(void *data) DMA_BIDIRECTIONAL); } + if (thread->type == DMA_MEMCPY) tx = dev->device_prep_dma_memcpy(chan, dma_dsts[0] + dst_off, @@ -316,6 +341,15 @@ static int dmatest_func(void *data) dma_dsts[0] + dst_off, dma_srcs, xor_sources, len, flags); + else if (thread->type == DMA_PQ) { + dma_addr_t dma_pq[dst_cnt]; + + for (i = 0; i < dst_cnt; i++) + dma_pq[i] = dma_dsts[i] + dst_off; + tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs, + pq_sources, pq_coefs, + len, flags); + } if (!tx) { for (i = 0; i < src_cnt; i++) @@ -459,6 +493,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty op = "copy"; else if (type == DMA_XOR) op = "xor"; + else if (type == DMA_PQ) + op = "pq"; else return -EINVAL; @@ -514,6 +550,10 @@ static int dmatest_add_channel(struct dma_chan *chan) cnt = dmatest_add_threads(dtc, DMA_XOR); thread_count += cnt > 0 ? cnt : 0; } + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { + cnt = dmatest_add_threads(dtc, DMA_PQ); + thread_count += cnt > 0 ?: 0; + } pr_info("dmatest: Started %u threads using %s\n", thread_count, dma_chan_name(chan)); diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 933c143b6a7..2eea823516a 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -116,7 +116,7 @@ static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc) { struct dw_desc *child; - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dma_sync_single_for_cpu(chan2parent(&dwc->chan), child->txd.phys, sizeof(child->lli), DMA_TO_DEVICE); @@ -137,11 +137,11 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) dwc_sync_desc_for_cpu(dwc, desc); spin_lock_bh(&dwc->lock); - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dev_vdbg(chan2dev(&dwc->chan), "moving child desc %p to freelist\n", child); - list_splice_init(&desc->txd.tx_list, &dwc->free_list); + list_splice_init(&desc->tx_list, &dwc->free_list); dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); list_add(&desc->desc_node, &dwc->free_list); spin_unlock_bh(&dwc->lock); @@ -209,19 +209,28 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc) param = txd->callback_param; dwc_sync_desc_for_cpu(dwc, desc); - list_splice_init(&txd->tx_list, &dwc->free_list); + list_splice_init(&desc->tx_list, &dwc->free_list); list_move(&desc->desc_node, &dwc->free_list); - /* - * We use dma_unmap_page() regardless of how the buffers were - * mapped before they were submitted... - */ - if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) - dma_unmap_page(chan2parent(&dwc->chan), desc->lli.dar, - desc->len, DMA_FROM_DEVICE); - if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) - dma_unmap_page(chan2parent(&dwc->chan), desc->lli.sar, - desc->len, DMA_TO_DEVICE); + if (!dwc->chan.private) { + struct device *parent = chan2parent(&dwc->chan); + if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) + dma_unmap_single(parent, desc->lli.dar, + desc->len, DMA_FROM_DEVICE); + else + dma_unmap_page(parent, desc->lli.dar, + desc->len, DMA_FROM_DEVICE); + } + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) + dma_unmap_single(parent, desc->lli.sar, + desc->len, DMA_TO_DEVICE); + else + dma_unmap_page(parent, desc->lli.sar, + desc->len, DMA_TO_DEVICE); + } + } /* * The API requires that no submissions are done from a @@ -289,7 +298,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) /* This one is currently in progress */ return; - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) if (child->lli.llp == llp) /* Currently in progress */ return; @@ -356,7 +365,7 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) dev_printk(KERN_CRIT, chan2dev(&dwc->chan), " cookie: %d\n", bad_desc->txd.cookie); dwc_dump_lli(dwc, &bad_desc->lli); - list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) + list_for_each_entry(child, &bad_desc->tx_list, desc_node) dwc_dump_lli(dwc, &child->lli); /* Pretend the descriptor completed successfully */ @@ -608,7 +617,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, prev->txd.phys, sizeof(prev->lli), DMA_TO_DEVICE); list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; } @@ -658,8 +667,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg_width = dws->reg_width; prev = first = NULL; - sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction); - switch (direction) { case DMA_TO_DEVICE: ctllo = (DWC_DEFAULT_CTLLO @@ -700,7 +707,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, sizeof(prev->lli), DMA_TO_DEVICE); list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; total_len += len; @@ -746,7 +753,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, sizeof(prev->lli), DMA_TO_DEVICE); list_add_tail(&desc->desc_node, - &first->txd.tx_list); + &first->tx_list); } prev = desc; total_len += len; @@ -902,6 +909,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) break; } + INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->txd, chan); desc->txd.tx_submit = dwc_tx_submit; desc->txd.flags = DMA_CTRL_ACK; diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h index 13a58076703..d9a939f67f4 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw_dmac_regs.h @@ -217,6 +217,7 @@ struct dw_desc { /* THEN values for driver housekeeping */ struct list_head desc_node; + struct list_head tx_list; struct dma_async_tx_descriptor txd; size_t len; }; diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index ef87a898414..296f9e747fa 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -34,6 +34,7 @@ #include <linux/dmapool.h> #include <linux/of_platform.h> +#include <asm/fsldma.h> #include "fsldma.h" static void dma_init(struct fsl_dma_chan *fsl_chan) @@ -280,28 +281,40 @@ static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size) } /** - * fsl_chan_toggle_ext_pause - Toggle channel external pause status + * fsl_chan_set_request_count - Set DMA Request Count for external control * @fsl_chan : Freescale DMA channel - * @size : Pause control size, 0 for disable external pause control. - * The maximum is 1024. + * @size : Number of bytes to transfer in a single request + * + * The Freescale DMA channel can be controlled by the external signal DREQ#. + * The DMA request count is how many bytes are allowed to transfer before + * pausing the channel, after which a new assertion of DREQ# resumes channel + * operation. * - * The Freescale DMA channel can be controlled by the external - * signal DREQ#. The pause control size is how many bytes are allowed - * to transfer before pausing the channel, after which a new assertion - * of DREQ# resumes channel operation. + * A size of 0 disables external pause control. The maximum size is 1024. */ -static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size) +static void fsl_chan_set_request_count(struct fsl_dma_chan *fsl_chan, int size) { - if (size > 1024) - return; + BUG_ON(size > 1024); + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, + DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) + | ((__ilog2(size) << 24) & 0x0f000000), + 32); +} - if (size) { - DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, - DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) - | ((__ilog2(size) << 24) & 0x0f000000), - 32); +/** + * fsl_chan_toggle_ext_pause - Toggle channel external pause status + * @fsl_chan : Freescale DMA channel + * @enable : 0 is disabled, 1 is enabled. + * + * The Freescale DMA channel can be controlled by the external signal DREQ#. + * The DMA Request Count feature should be used in addition to this feature + * to set the number of bytes to transfer before pausing the channel. + */ +static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int enable) +{ + if (enable) fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT; - } else + else fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT; } @@ -326,7 +339,8 @@ static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable) static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) { struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan); - struct fsl_desc_sw *desc; + struct fsl_desc_sw *desc = tx_to_fsl_desc(tx); + struct fsl_desc_sw *child; unsigned long flags; dma_cookie_t cookie; @@ -334,7 +348,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&fsl_chan->desc_lock, flags); cookie = fsl_chan->common.cookie; - list_for_each_entry(desc, &tx->tx_list, node) { + list_for_each_entry(child, &desc->tx_list, node) { cookie++; if (cookie < 0) cookie = 1; @@ -343,8 +357,8 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) } fsl_chan->common.cookie = cookie; - append_ld_queue(fsl_chan, tx_to_fsl_desc(tx)); - list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev); + append_ld_queue(fsl_chan, desc); + list_splice_init(&desc->tx_list, fsl_chan->ld_queue.prev); spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); @@ -366,6 +380,7 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor( desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc); if (desc_sw) { memset(desc_sw, 0, sizeof(struct fsl_desc_sw)); + INIT_LIST_HEAD(&desc_sw->tx_list); dma_async_tx_descriptor_init(&desc_sw->async_tx, &fsl_chan->common); desc_sw->async_tx.tx_submit = fsl_dma_tx_submit; @@ -455,7 +470,7 @@ fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags) new->async_tx.flags = flags; /* Insert the link descriptor to the LD ring */ - list_add_tail(&new->node, &new->async_tx.tx_list); + list_add_tail(&new->node, &new->tx_list); /* Set End-of-link to the last link descriptor of new list*/ set_ld_eol(fsl_chan, new); @@ -513,7 +528,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy( dma_dest += copy; /* Insert the link descriptor to the LD ring */ - list_add_tail(&new->node, &first->async_tx.tx_list); + list_add_tail(&new->node, &first->tx_list); } while (len); new->async_tx.flags = flags; /* client is in control of this ack */ @@ -528,7 +543,7 @@ fail: if (!first) return NULL; - list = &first->async_tx.tx_list; + list = &first->tx_list; list_for_each_entry_safe_reverse(new, prev, list, node) { list_del(&new->node); dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys); @@ -538,6 +553,229 @@ fail: } /** + * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction + * @chan: DMA channel + * @sgl: scatterlist to transfer to/from + * @sg_len: number of entries in @scatterlist + * @direction: DMA direction + * @flags: DMAEngine flags + * + * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the + * DMA_SLAVE API, this gets the device-specific information from the + * chan->private variable. + */ +static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_data_direction direction, unsigned long flags) +{ + struct fsl_dma_chan *fsl_chan; + struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL; + struct fsl_dma_slave *slave; + struct list_head *tx_list; + size_t copy; + + int i; + struct scatterlist *sg; + size_t sg_used; + size_t hw_used; + struct fsl_dma_hw_addr *hw; + dma_addr_t dma_dst, dma_src; + + if (!chan) + return NULL; + + if (!chan->private) + return NULL; + + fsl_chan = to_fsl_chan(chan); + slave = chan->private; + + if (list_empty(&slave->addresses)) + return NULL; + + hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry); + hw_used = 0; + + /* + * Build the hardware transaction to copy from the scatterlist to + * the hardware, or from the hardware to the scatterlist + * + * If you are copying from the hardware to the scatterlist and it + * takes two hardware entries to fill an entire page, then both + * hardware entries will be coalesced into the same page + * + * If you are copying from the scatterlist to the hardware and a + * single page can fill two hardware entries, then the data will + * be read out of the page into the first hardware entry, and so on + */ + for_each_sg(sgl, sg, sg_len, i) { + sg_used = 0; + + /* Loop until the entire scatterlist entry is used */ + while (sg_used < sg_dma_len(sg)) { + + /* + * If we've used up the current hardware address/length + * pair, we need to load a new one + * + * This is done in a while loop so that descriptors with + * length == 0 will be skipped + */ + while (hw_used >= hw->length) { + + /* + * If the current hardware entry is the last + * entry in the list, we're finished + */ + if (list_is_last(&hw->entry, &slave->addresses)) + goto finished; + + /* Get the next hardware address/length pair */ + hw = list_entry(hw->entry.next, + struct fsl_dma_hw_addr, entry); + hw_used = 0; + } + + /* Allocate the link descriptor from DMA pool */ + new = fsl_dma_alloc_descriptor(fsl_chan); + if (!new) { + dev_err(fsl_chan->dev, "No free memory for " + "link descriptor\n"); + goto fail; + } +#ifdef FSL_DMA_LD_DEBUG + dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new); +#endif + + /* + * Calculate the maximum number of bytes to transfer, + * making sure it is less than the DMA controller limit + */ + copy = min_t(size_t, sg_dma_len(sg) - sg_used, + hw->length - hw_used); + copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT); + + /* + * DMA_FROM_DEVICE + * from the hardware to the scatterlist + * + * DMA_TO_DEVICE + * from the scatterlist to the hardware + */ + if (direction == DMA_FROM_DEVICE) { + dma_src = hw->address + hw_used; + dma_dst = sg_dma_address(sg) + sg_used; + } else { + dma_src = sg_dma_address(sg) + sg_used; + dma_dst = hw->address + hw_used; + } + + /* Fill in the descriptor */ + set_desc_cnt(fsl_chan, &new->hw, copy); + set_desc_src(fsl_chan, &new->hw, dma_src); + set_desc_dest(fsl_chan, &new->hw, dma_dst); + + /* + * If this is not the first descriptor, chain the + * current descriptor after the previous descriptor + */ + if (!first) { + first = new; + } else { + set_desc_next(fsl_chan, &prev->hw, + new->async_tx.phys); + } + + new->async_tx.cookie = 0; + async_tx_ack(&new->async_tx); + + prev = new; + sg_used += copy; + hw_used += copy; + + /* Insert the link descriptor into the LD ring */ + list_add_tail(&new->node, &first->tx_list); + } + } + +finished: + + /* All of the hardware address/length pairs had length == 0 */ + if (!first || !new) + return NULL; + + new->async_tx.flags = flags; + new->async_tx.cookie = -EBUSY; + + /* Set End-of-link to the last link descriptor of new list */ + set_ld_eol(fsl_chan, new); + + /* Enable extra controller features */ + if (fsl_chan->set_src_loop_size) + fsl_chan->set_src_loop_size(fsl_chan, slave->src_loop_size); + + if (fsl_chan->set_dest_loop_size) + fsl_chan->set_dest_loop_size(fsl_chan, slave->dst_loop_size); + + if (fsl_chan->toggle_ext_start) + fsl_chan->toggle_ext_start(fsl_chan, slave->external_start); + + if (fsl_chan->toggle_ext_pause) + fsl_chan->toggle_ext_pause(fsl_chan, slave->external_pause); + + if (fsl_chan->set_request_count) + fsl_chan->set_request_count(fsl_chan, slave->request_count); + + return &first->async_tx; + +fail: + /* If first was not set, then we failed to allocate the very first + * descriptor, and we're done */ + if (!first) + return NULL; + + /* + * First is set, so all of the descriptors we allocated have been added + * to first->tx_list, INCLUDING "first" itself. Therefore we + * must traverse the list backwards freeing each descriptor in turn + * + * We're re-using variables for the loop, oh well + */ + tx_list = &first->tx_list; + list_for_each_entry_safe_reverse(new, prev, tx_list, node) { + list_del_init(&new->node); + dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys); + } + + return NULL; +} + +static void fsl_dma_device_terminate_all(struct dma_chan *chan) +{ + struct fsl_dma_chan *fsl_chan; + struct fsl_desc_sw *desc, *tmp; + unsigned long flags; + + if (!chan) + return; + + fsl_chan = to_fsl_chan(chan); + + /* Halt the DMA engine */ + dma_halt(fsl_chan); + + spin_lock_irqsave(&fsl_chan->desc_lock, flags); + + /* Remove and free all of the descriptors in the LD queue */ + list_for_each_entry_safe(desc, tmp, &fsl_chan->ld_queue, node) { + list_del(&desc->node); + dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys); + } + + spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); +} + +/** * fsl_dma_update_completed_cookie - Update the completed cookie. * @fsl_chan : Freescale DMA channel */ @@ -883,6 +1121,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev, new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size; new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size; + new_fsl_chan->set_request_count = fsl_chan_set_request_count; } spin_lock_init(&new_fsl_chan->desc_lock); @@ -962,12 +1201,15 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev, dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); + dma_cap_set(DMA_SLAVE, fdev->common.cap_mask); fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt; fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy; fdev->common.device_is_tx_complete = fsl_dma_is_complete; fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; + fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg; + fdev->common.device_terminate_all = fsl_dma_device_terminate_all; fdev->common.dev = &dev->dev; fdev->irq = irq_of_parse_and_map(dev->node, 0); diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h index dc7f2686579..0df14cbb8ca 100644 --- a/drivers/dma/fsldma.h +++ b/drivers/dma/fsldma.h @@ -90,6 +90,7 @@ struct fsl_dma_ld_hw { struct fsl_desc_sw { struct fsl_dma_ld_hw hw; struct list_head node; + struct list_head tx_list; struct dma_async_tx_descriptor async_tx; struct list_head *ld; void *priv; @@ -143,10 +144,11 @@ struct fsl_dma_chan { struct tasklet_struct tasklet; u32 feature; - void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size); + void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int enable); void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable); void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size); void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size); + void (*set_request_count)(struct fsl_dma_chan *fsl_chan, int size); }; #define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common) diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c deleted file mode 100644 index 2225bb6ba3d..00000000000 --- a/drivers/dma/ioat.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Intel I/OAT DMA Linux driver - * Copyright(c) 2007 - 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - */ - -/* - * This driver supports an Intel I/OAT DMA engine, which does asynchronous - * copy operations. - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/dca.h> -#include "ioatdma.h" -#include "ioatdma_registers.h" -#include "ioatdma_hw.h" - -MODULE_VERSION(IOAT_DMA_VERSION); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Intel Corporation"); - -static struct pci_device_id ioat_pci_tbl[] = { - /* I/OAT v1 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, - { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, - - /* I/OAT v2 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, - - /* I/OAT v3 platforms */ - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, - { 0, } -}; - -struct ioat_device { - struct pci_dev *pdev; - void __iomem *iobase; - struct ioatdma_device *dma; - struct dca_provider *dca; -}; - -static int __devinit ioat_probe(struct pci_dev *pdev, - const struct pci_device_id *id); -static void __devexit ioat_remove(struct pci_dev *pdev); - -static int ioat_dca_enabled = 1; -module_param(ioat_dca_enabled, int, 0644); -MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); - -static struct pci_driver ioat_pci_driver = { - .name = "ioatdma", - .id_table = ioat_pci_tbl, - .probe = ioat_probe, - .remove = __devexit_p(ioat_remove), -}; - -static int __devinit ioat_probe(struct pci_dev *pdev, - const struct pci_device_id *id) -{ - void __iomem *iobase; - struct ioat_device *device; - unsigned long mmio_start, mmio_len; - int err; - - err = pci_enable_device(pdev); - if (err) - goto err_enable_device; - - err = pci_request_regions(pdev, ioat_pci_driver.name); - if (err) - goto err_request_regions; - - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - goto err_set_dma_mask; - - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - if (err) - goto err_set_dma_mask; - - mmio_start = pci_resource_start(pdev, 0); - mmio_len = pci_resource_len(pdev, 0); - iobase = ioremap(mmio_start, mmio_len); - if (!iobase) { - err = -ENOMEM; - goto err_ioremap; - } - - device = kzalloc(sizeof(*device), GFP_KERNEL); - if (!device) { - err = -ENOMEM; - goto err_kzalloc; - } - device->pdev = pdev; - pci_set_drvdata(pdev, device); - device->iobase = iobase; - - pci_set_master(pdev); - - switch (readb(iobase + IOAT_VER_OFFSET)) { - case IOAT_VER_1_2: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat_dca_init(pdev, iobase); - break; - case IOAT_VER_2_0: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat2_dca_init(pdev, iobase); - break; - case IOAT_VER_3_0: - device->dma = ioat_dma_probe(pdev, iobase); - if (device->dma && ioat_dca_enabled) - device->dca = ioat3_dca_init(pdev, iobase); - break; - default: - err = -ENODEV; - break; - } - if (!device->dma) - err = -ENODEV; - - if (err) - goto err_version; - - return 0; - -err_version: - kfree(device); -err_kzalloc: - iounmap(iobase); -err_ioremap: -err_set_dma_mask: - pci_release_regions(pdev); - pci_disable_device(pdev); -err_request_regions: -err_enable_device: - return err; -} - -static void __devexit ioat_remove(struct pci_dev *pdev) -{ - struct ioat_device *device = pci_get_drvdata(pdev); - - dev_err(&pdev->dev, "Removing dma and dca services\n"); - if (device->dca) { - unregister_dca_provider(device->dca); - free_dca_provider(device->dca); - device->dca = NULL; - } - - if (device->dma) { - ioat_dma_remove(device->dma); - device->dma = NULL; - } - - kfree(device); -} - -static int __init ioat_init_module(void) -{ - return pci_register_driver(&ioat_pci_driver); -} -module_init(ioat_init_module); - -static void __exit ioat_exit_module(void) -{ - pci_unregister_driver(&ioat_pci_driver); -} -module_exit(ioat_exit_module); diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile new file mode 100644 index 00000000000..8997d3fb905 --- /dev/null +++ b/drivers/dma/ioat/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o +ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat/dca.c index c012a1e1504..69d02615c4d 100644 --- a/drivers/dma/ioat_dca.c +++ b/drivers/dma/ioat/dca.c @@ -33,8 +33,8 @@ #define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24) #endif -#include "ioatdma.h" -#include "ioatdma_registers.h" +#include "dma.h" +#include "registers.h" /* * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 @@ -242,7 +242,8 @@ static struct dca_ops ioat_dca_ops = { }; -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) +struct dca_provider * __devinit +ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) { struct dca_provider *dca; struct ioat_dca_priv *ioatdca; @@ -407,7 +408,8 @@ static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) return slots; } -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) +struct dca_provider * __devinit +ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) { struct dca_provider *dca; struct ioat_dca_priv *ioatdca; @@ -602,7 +604,8 @@ static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) return slots; } -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) +struct dca_provider * __devinit +ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) { struct dca_provider *dca; struct ioat_dca_priv *ioatdca; diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c new file mode 100644 index 00000000000..c524d36d3c2 --- /dev/null +++ b/drivers/dma/ioat/dma.c @@ -0,0 +1,1238 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine, which does asynchronous + * copy operations. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/workqueue.h> +#include <linux/i7300_idle.h> +#include "dma.h" +#include "registers.h" +#include "hw.h" + +int ioat_pending_level = 4; +module_param(ioat_pending_level, int, 0644); +MODULE_PARM_DESC(ioat_pending_level, + "high-water mark for pushing ioat descriptors (default: 4)"); + +/* internal functions */ +static void ioat1_cleanup(struct ioat_dma_chan *ioat); +static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat); + +/** + * ioat_dma_do_interrupt - handler used for single vector interrupt mode + * @irq: interrupt id + * @data: interrupt data + */ +static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) +{ + struct ioatdma_device *instance = data; + struct ioat_chan_common *chan; + unsigned long attnstatus; + int bit; + u8 intrctrl; + + intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); + + if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) + return IRQ_NONE; + + if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); + return IRQ_NONE; + } + + attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); + for_each_bit(bit, &attnstatus, BITS_PER_LONG) { + chan = ioat_chan_by_index(instance, bit); + tasklet_schedule(&chan->cleanup_task); + } + + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); + return IRQ_HANDLED; +} + +/** + * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode + * @irq: interrupt id + * @data: interrupt data + */ +static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) +{ + struct ioat_chan_common *chan = data; + + tasklet_schedule(&chan->cleanup_task); + + return IRQ_HANDLED; +} + +static void ioat1_cleanup_tasklet(unsigned long data); + +/* common channel initialization */ +void ioat_init_channel(struct ioatdma_device *device, + struct ioat_chan_common *chan, int idx, + void (*timer_fn)(unsigned long), + void (*tasklet)(unsigned long), + unsigned long ioat) +{ + struct dma_device *dma = &device->common; + + chan->device = device; + chan->reg_base = device->reg_base + (0x80 * (idx + 1)); + spin_lock_init(&chan->cleanup_lock); + chan->common.device = dma; + list_add_tail(&chan->common.device_node, &dma->channels); + device->idx[idx] = chan; + init_timer(&chan->timer); + chan->timer.function = timer_fn; + chan->timer.data = ioat; + tasklet_init(&chan->cleanup_task, tasklet, ioat); + tasklet_disable(&chan->cleanup_task); +} + +static void ioat1_timer_event(unsigned long data); + +/** + * ioat1_dma_enumerate_channels - find and initialize the device's channels + * @device: the device to be enumerated + */ +static int ioat1_enumerate_channels(struct ioatdma_device *device) +{ + u8 xfercap_scale; + u32 xfercap; + int i; + struct ioat_dma_chan *ioat; + struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; + + INIT_LIST_HEAD(&dma->channels); + dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + dma->chancnt &= 0x1f; /* bits [4:0] valid */ + if (dma->chancnt > ARRAY_SIZE(device->idx)) { + dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", + dma->chancnt, ARRAY_SIZE(device->idx)); + dma->chancnt = ARRAY_SIZE(device->idx); + } + xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); + xfercap_scale &= 0x1f; /* bits [4:0] valid */ + xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); + dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap); + +#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL + if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) + dma->chancnt--; +#endif + for (i = 0; i < dma->chancnt; i++) { + ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); + if (!ioat) + break; + + ioat_init_channel(device, &ioat->base, i, + ioat1_timer_event, + ioat1_cleanup_tasklet, + (unsigned long) ioat); + ioat->xfercap = xfercap; + spin_lock_init(&ioat->desc_lock); + INIT_LIST_HEAD(&ioat->free_desc); + INIT_LIST_HEAD(&ioat->used_desc); + } + dma->chancnt = i; + return i; +} + +/** + * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended + * descriptors to hw + * @chan: DMA channel handle + */ +static inline void +__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat) +{ + void __iomem *reg_base = ioat->base.reg_base; + + dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n", + __func__, ioat->pending); + ioat->pending = 0; + writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET); +} + +static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(chan); + + if (ioat->pending > 0) { + spin_lock_bh(&ioat->desc_lock); + __ioat1_dma_memcpy_issue_pending(ioat); + spin_unlock_bh(&ioat->desc_lock); + } +} + +/** + * ioat1_reset_channel - restart a channel + * @ioat: IOAT DMA channel handle + */ +static void ioat1_reset_channel(struct ioat_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + void __iomem *reg_base = chan->reg_base; + u32 chansts, chanerr; + + dev_warn(to_dev(chan), "reset\n"); + chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); + chansts = *chan->completion & IOAT_CHANSTS_STATUS; + if (chanerr) { + dev_err(to_dev(chan), + "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", + chan_num(chan), chansts, chanerr); + writel(chanerr, reg_base + IOAT_CHANERR_OFFSET); + } + + /* + * whack it upside the head with a reset + * and wait for things to settle out. + * force the pending count to a really big negative + * to make sure no one forces an issue_pending + * while we're waiting. + */ + + ioat->pending = INT_MIN; + writeb(IOAT_CHANCMD_RESET, + reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); + set_bit(IOAT_RESET_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + RESET_DELAY); +} + +static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct dma_chan *c = tx->chan; + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_desc_sw *first; + struct ioat_desc_sw *chain_tail; + dma_cookie_t cookie; + + spin_lock_bh(&ioat->desc_lock); + /* cookie incr and addition to used_list must be atomic */ + cookie = c->cookie; + cookie++; + if (cookie < 0) + cookie = 1; + c->cookie = cookie; + tx->cookie = cookie; + dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); + + /* write address into NextDescriptor field of last desc in chain */ + first = to_ioat_desc(desc->tx_list.next); + chain_tail = to_ioat_desc(ioat->used_desc.prev); + /* make descriptor updates globally visible before chaining */ + wmb(); + chain_tail->hw->next = first->txd.phys; + list_splice_tail_init(&desc->tx_list, &ioat->used_desc); + dump_desc_dbg(ioat, chain_tail); + dump_desc_dbg(ioat, first); + + if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + ioat->active += desc->hw->tx_cnt; + ioat->pending += desc->hw->tx_cnt; + if (ioat->pending >= ioat_pending_level) + __ioat1_dma_memcpy_issue_pending(ioat); + spin_unlock_bh(&ioat->desc_lock); + + return cookie; +} + +/** + * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair + * @ioat: the channel supplying the memory pool for the descriptors + * @flags: allocation flags + */ +static struct ioat_desc_sw * +ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) +{ + struct ioat_dma_descriptor *desc; + struct ioat_desc_sw *desc_sw; + struct ioatdma_device *ioatdma_device; + dma_addr_t phys; + + ioatdma_device = ioat->base.device; + desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); + if (unlikely(!desc)) + return NULL; + + desc_sw = kzalloc(sizeof(*desc_sw), flags); + if (unlikely(!desc_sw)) { + pci_pool_free(ioatdma_device->dma_pool, desc, phys); + return NULL; + } + + memset(desc, 0, sizeof(*desc)); + + INIT_LIST_HEAD(&desc_sw->tx_list); + dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common); + desc_sw->txd.tx_submit = ioat1_tx_submit; + desc_sw->hw = desc; + desc_sw->txd.phys = phys; + set_desc_id(desc_sw, -1); + + return desc_sw; +} + +static int ioat_initial_desc_count = 256; +module_param(ioat_initial_desc_count, int, 0644); +MODULE_PARM_DESC(ioat_initial_desc_count, + "ioat1: initial descriptors per channel (default: 256)"); +/** + * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors + * @chan: the channel to be filled out + */ +static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_desc_sw *desc; + u32 chanerr; + int i; + LIST_HEAD(tmp_list); + + /* have we already been set up? */ + if (!list_empty(&ioat->free_desc)) + return ioat->desccount; + + /* Setup register to interrupt and write completion status on error */ + writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + if (chanerr) { + dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + } + + /* Allocate descriptors */ + for (i = 0; i < ioat_initial_desc_count; i++) { + desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL); + if (!desc) { + dev_err(to_dev(chan), "Only %d initial descriptors\n", i); + break; + } + set_desc_id(desc, i); + list_add_tail(&desc->node, &tmp_list); + } + spin_lock_bh(&ioat->desc_lock); + ioat->desccount = i; + list_splice(&tmp_list, &ioat->free_desc); + spin_unlock_bh(&ioat->desc_lock); + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + chan->completion = pci_pool_alloc(chan->device->completion_pool, + GFP_KERNEL, &chan->completion_dma); + memset(chan->completion, 0, sizeof(*chan->completion)); + writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, + chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64) chan->completion_dma) >> 32, + chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + tasklet_enable(&chan->cleanup_task); + ioat1_dma_start_null_desc(ioat); /* give chain to dma device */ + dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", + __func__, ioat->desccount); + return ioat->desccount; +} + +/** + * ioat1_dma_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ +static void ioat1_dma_free_chan_resources(struct dma_chan *c) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *ioatdma_device = chan->device; + struct ioat_desc_sw *desc, *_desc; + int in_use_descs = 0; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (ioat->desccount == 0) + return; + + tasklet_disable(&chan->cleanup_task); + del_timer_sync(&chan->timer); + ioat1_cleanup(ioat); + + /* Delay 100ms after reset to allow internal DMA logic to quiesce + * before removing DMA descriptor resources. + */ + writeb(IOAT_CHANCMD_RESET, + chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); + mdelay(100); + + spin_lock_bh(&ioat->desc_lock); + list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { + dev_dbg(to_dev(chan), "%s: freeing %d from used list\n", + __func__, desc_id(desc)); + dump_desc_dbg(ioat, desc); + in_use_descs++; + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->txd.phys); + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, + &ioat->free_desc, node) { + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->txd.phys); + kfree(desc); + } + spin_unlock_bh(&ioat->desc_lock); + + pci_pool_free(ioatdma_device->completion_pool, + chan->completion, + chan->completion_dma); + + /* one is ok since we left it on there on purpose */ + if (in_use_descs > 1) + dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", + in_use_descs - 1); + + chan->last_completion = 0; + chan->completion_dma = 0; + ioat->pending = 0; + ioat->desccount = 0; +} + +/** + * ioat1_dma_get_next_descriptor - return the next available descriptor + * @ioat: IOAT DMA channel handle + * + * Gets the next descriptor from the chain, and must be called with the + * channel's desc_lock held. Allocates more descriptors if the channel + * has run out. + */ +static struct ioat_desc_sw * +ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat) +{ + struct ioat_desc_sw *new; + + if (!list_empty(&ioat->free_desc)) { + new = to_ioat_desc(ioat->free_desc.next); + list_del(&new->node); + } else { + /* try to get another desc */ + new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC); + if (!new) { + dev_err(to_dev(&ioat->base), "alloc failed\n"); + return NULL; + } + } + dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n", + __func__, desc_id(new)); + prefetch(new->hw); + return new; +} + +static struct dma_async_tx_descriptor * +ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + struct ioat_desc_sw *desc; + size_t copy; + LIST_HEAD(chain); + dma_addr_t src = dma_src; + dma_addr_t dest = dma_dest; + size_t total_len = len; + struct ioat_dma_descriptor *hw = NULL; + int tx_cnt = 0; + + spin_lock_bh(&ioat->desc_lock); + desc = ioat1_dma_get_next_descriptor(ioat); + do { + if (!desc) + break; + + tx_cnt++; + copy = min_t(size_t, len, ioat->xfercap); + + hw = desc->hw; + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dest; + + list_add_tail(&desc->node, &chain); + + len -= copy; + dest += copy; + src += copy; + if (len) { + struct ioat_desc_sw *next; + + async_tx_ack(&desc->txd); + next = ioat1_dma_get_next_descriptor(ioat); + hw->next = next ? next->txd.phys : 0; + dump_desc_dbg(ioat, desc); + desc = next; + } else + hw->next = 0; + } while (len); + + if (!desc) { + struct ioat_chan_common *chan = &ioat->base; + + dev_err(to_dev(chan), + "chan%d - get_next_desc failed\n", chan_num(chan)); + list_splice(&chain, &ioat->free_desc); + spin_unlock_bh(&ioat->desc_lock); + return NULL; + } + spin_unlock_bh(&ioat->desc_lock); + + desc->txd.flags = flags; + desc->len = total_len; + list_splice(&chain, &desc->tx_list); + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->tx_cnt = tx_cnt; + dump_desc_dbg(ioat, desc); + + return &desc->txd; +} + +static void ioat1_cleanup_tasklet(unsigned long data) +{ + struct ioat_dma_chan *chan = (void *)data; + + ioat1_cleanup(chan); + writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET); +} + +void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, + size_t len, struct ioat_dma_descriptor *hw) +{ + struct pci_dev *pdev = chan->device->pdev; + size_t offset = len - hw->size; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, hw->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) + ioat_unmap(pdev, hw->src_addr - offset, len, + PCI_DMA_TODEVICE, flags, 0); +} + +unsigned long ioat_get_current_completion(struct ioat_chan_common *chan) +{ + unsigned long phys_complete; + u64 completion; + + completion = *chan->completion; + phys_complete = ioat_chansts_to_addr(completion); + + dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, + (unsigned long long) phys_complete); + + if (is_ioat_halted(completion)) { + u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + dev_err(to_dev(chan), "Channel halted, chanerr = %x\n", + chanerr); + + /* TODO do something to salvage the situation */ + } + + return phys_complete; +} + +bool ioat_cleanup_preamble(struct ioat_chan_common *chan, + unsigned long *phys_complete) +{ + *phys_complete = ioat_get_current_completion(chan); + if (*phys_complete == chan->last_completion) + return false; + clear_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + return true; +} + +static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete) +{ + struct ioat_chan_common *chan = &ioat->base; + struct list_head *_desc, *n; + struct dma_async_tx_descriptor *tx; + + dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n", + __func__, phys_complete); + list_for_each_safe(_desc, n, &ioat->used_desc) { + struct ioat_desc_sw *desc; + + prefetch(n); + desc = list_entry(_desc, typeof(*desc), node); + tx = &desc->txd; + /* + * Incoming DMA requests may use multiple descriptors, + * due to exceeding xfercap, perhaps. If so, only the + * last one will have a cookie, and require unmapping. + */ + dump_desc_dbg(ioat, desc); + if (tx->cookie) { + chan->completed_cookie = tx->cookie; + tx->cookie = 0; + ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + ioat->active -= desc->hw->tx_cnt; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys != phys_complete) { + /* + * a completed entry, but not the last, so clean + * up if the client is done with the descriptor + */ + if (async_tx_test_ack(tx)) + list_move_tail(&desc->node, &ioat->free_desc); + } else { + /* + * last used desc. Do not remove, so we can + * append from it. + */ + + /* if nothing else is pending, cancel the + * completion timeout + */ + if (n == &ioat->used_desc) { + dev_dbg(to_dev(chan), + "%s cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + } + + /* TODO check status bits? */ + break; + } + } + + chan->last_completion = phys_complete; +} + +/** + * ioat1_cleanup - cleanup up finished descriptors + * @chan: ioat channel to be cleaned up + * + * To prevent lock contention we defer cleanup when the locks are + * contended with a terminal timeout that forces cleanup and catches + * completion notification errors. + */ +static void ioat1_cleanup(struct ioat_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + + prefetch(chan->completion); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + if (!ioat_cleanup_preamble(chan, &phys_complete)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + if (!spin_trylock_bh(&ioat->desc_lock)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + __cleanup(ioat, phys_complete); + + spin_unlock_bh(&ioat->desc_lock); + spin_unlock_bh(&chan->cleanup_lock); +} + +static void ioat1_timer_event(unsigned long data) +{ + struct ioat_dma_chan *ioat = (void *) data; + struct ioat_chan_common *chan = &ioat->base; + + dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state); + + spin_lock_bh(&chan->cleanup_lock); + if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) { + struct ioat_desc_sw *desc; + + spin_lock_bh(&ioat->desc_lock); + + /* restart active descriptors */ + desc = to_ioat_desc(ioat->used_desc.prev); + ioat_set_chainaddr(ioat, desc->txd.phys); + ioat_start(chan); + + ioat->pending = 0; + set_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + spin_unlock_bh(&ioat->desc_lock); + } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + unsigned long phys_complete; + + spin_lock_bh(&ioat->desc_lock); + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + ioat1_reset_channel(ioat); + else { + u64 status = ioat_chansts(chan); + + /* manually update the last completion address */ + if (ioat_chansts_to_addr(status) != 0) + *chan->completion = status; + + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&ioat->desc_lock); + } + spin_unlock_bh(&chan->cleanup_lock); +} + +static enum dma_status +ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + + if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + return DMA_SUCCESS; + + ioat1_cleanup(ioat); + + return ioat_is_complete(c, cookie, done, used); +} + +static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + struct ioat_desc_sw *desc; + struct ioat_dma_descriptor *hw; + + spin_lock_bh(&ioat->desc_lock); + + desc = ioat1_dma_get_next_descriptor(ioat); + + if (!desc) { + dev_err(to_dev(chan), + "Unable to start null desc - get next desc failed\n"); + spin_unlock_bh(&ioat->desc_lock); + return; + } + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.compl_write = 1; + /* set size to non-zero value (channel returns error when size is 0) */ + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + async_tx_ack(&desc->txd); + hw->next = 0; + list_add_tail(&desc->node, &ioat->used_desc); + dump_desc_dbg(ioat, desc); + + ioat_set_chainaddr(ioat, desc->txd.phys); + ioat_start(chan); + spin_unlock_bh(&ioat->desc_lock); +} + +/* + * Perform a IOAT transaction to verify the HW works. + */ +#define IOAT_TEST_SIZE 2000 + +static void __devinit ioat_dma_test_callback(void *dma_async_param) +{ + struct completion *cmp = dma_async_param; + + complete(cmp); +} + +/** + * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. + * @device: device to be tested + */ +int __devinit ioat_dma_self_test(struct ioatdma_device *device) +{ + int i; + u8 *src; + u8 *dest; + struct dma_device *dma = &device->common; + struct device *dev = &device->pdev->dev; + struct dma_chan *dma_chan; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + int err = 0; + struct completion cmp; + unsigned long tmo; + unsigned long flags; + + src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < IOAT_TEST_SIZE; i++) + src[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + dev_err(dev, "selftest cannot allocate chan resource\n"); + err = -ENODEV; + goto out; + } + + dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); + dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); + flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE | + DMA_PREP_INTERRUPT; + tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, + IOAT_TEST_SIZE, flags); + if (!tx) { + dev_err(dev, "Self-test prep failed, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test setup failed, disabling\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (tmo == 0 || + dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) + != DMA_SUCCESS) { + dev_err(dev, "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + if (memcmp(src, dest, IOAT_TEST_SIZE)) { + dev_err(dev, "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +static char ioat_interrupt_style[32] = "msix"; +module_param_string(ioat_interrupt_style, ioat_interrupt_style, + sizeof(ioat_interrupt_style), 0644); +MODULE_PARM_DESC(ioat_interrupt_style, + "set ioat interrupt style: msix (default), " + "msix-single-vector, msi, intx)"); + +/** + * ioat_dma_setup_interrupts - setup interrupt handler + * @device: ioat device + */ +static int ioat_dma_setup_interrupts(struct ioatdma_device *device) +{ + struct ioat_chan_common *chan; + struct pci_dev *pdev = device->pdev; + struct device *dev = &pdev->dev; + struct msix_entry *msix; + int i, j, msixcnt; + int err = -EINVAL; + u8 intrctrl = 0; + + if (!strcmp(ioat_interrupt_style, "msix")) + goto msix; + if (!strcmp(ioat_interrupt_style, "msix-single-vector")) + goto msix_single_vector; + if (!strcmp(ioat_interrupt_style, "msi")) + goto msi; + if (!strcmp(ioat_interrupt_style, "intx")) + goto intx; + dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style); + goto err_no_irq; + +msix: + /* The number of MSI-X vectors should equal the number of channels */ + msixcnt = device->common.chancnt; + for (i = 0; i < msixcnt; i++) + device->msix_entries[i].entry = i; + + err = pci_enable_msix(pdev, device->msix_entries, msixcnt); + if (err < 0) + goto msi; + if (err > 0) + goto msix_single_vector; + + for (i = 0; i < msixcnt; i++) { + msix = &device->msix_entries[i]; + chan = ioat_chan_by_index(device, i); + err = devm_request_irq(dev, msix->vector, + ioat_dma_do_interrupt_msix, 0, + "ioat-msix", chan); + if (err) { + for (j = 0; j < i; j++) { + msix = &device->msix_entries[j]; + chan = ioat_chan_by_index(device, j); + devm_free_irq(dev, msix->vector, chan); + } + goto msix_single_vector; + } + } + intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; + goto done; + +msix_single_vector: + msix = &device->msix_entries[0]; + msix->entry = 0; + err = pci_enable_msix(pdev, device->msix_entries, 1); + if (err) + goto msi; + + err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0, + "ioat-msix", device); + if (err) { + pci_disable_msix(pdev); + goto msi; + } + goto done; + +msi: + err = pci_enable_msi(pdev); + if (err) + goto intx; + + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0, + "ioat-msi", device); + if (err) { + pci_disable_msi(pdev); + goto intx; + } + goto done; + +intx: + err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, + IRQF_SHARED, "ioat-intx", device); + if (err) + goto err_no_irq; + +done: + if (device->intr_quirk) + device->intr_quirk(device); + intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; + writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); + return 0; + +err_no_irq: + /* Disable all interrupt generation */ + writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); + dev_err(dev, "no usable interrupts\n"); + return err; +} + +static void ioat_disable_interrupts(struct ioatdma_device *device) +{ + /* Disable all interrupt generation */ + writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); +} + +int __devinit ioat_probe(struct ioatdma_device *device) +{ + int err = -ENODEV; + struct dma_device *dma = &device->common; + struct pci_dev *pdev = device->pdev; + struct device *dev = &pdev->dev; + + /* DMA coherent memory pool for DMA descriptor allocations */ + device->dma_pool = pci_pool_create("dma_desc_pool", pdev, + sizeof(struct ioat_dma_descriptor), + 64, 0); + if (!device->dma_pool) { + err = -ENOMEM; + goto err_dma_pool; + } + + device->completion_pool = pci_pool_create("completion_pool", pdev, + sizeof(u64), SMP_CACHE_BYTES, + SMP_CACHE_BYTES); + + if (!device->completion_pool) { + err = -ENOMEM; + goto err_completion_pool; + } + + device->enumerate_channels(device); + + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + dma->dev = &pdev->dev; + + if (!dma->chancnt) { + dev_err(dev, "zero channels detected\n"); + goto err_setup_interrupts; + } + + err = ioat_dma_setup_interrupts(device); + if (err) + goto err_setup_interrupts; + + err = device->self_test(device); + if (err) + goto err_self_test; + + return 0; + +err_self_test: + ioat_disable_interrupts(device); +err_setup_interrupts: + pci_pool_destroy(device->completion_pool); +err_completion_pool: + pci_pool_destroy(device->dma_pool); +err_dma_pool: + return err; +} + +int __devinit ioat_register(struct ioatdma_device *device) +{ + int err = dma_async_device_register(&device->common); + + if (err) { + ioat_disable_interrupts(device); + pci_pool_destroy(device->completion_pool); + pci_pool_destroy(device->dma_pool); + } + + return err; +} + +/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */ +static void ioat1_intr_quirk(struct ioatdma_device *device) +{ + struct pci_dev *pdev = device->pdev; + u32 dmactrl; + + pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl); + if (pdev->msi_enabled) + dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; + else + dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN; + pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); +} + +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + + return sprintf(page, "%d\n", ioat->desccount); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioat_dma_chan *ioat = to_ioat_chan(c); + + return sprintf(page, "%d\n", ioat->active); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static ssize_t cap_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + + return sprintf(page, "copy%s%s%s%s%s%s\n", + dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", + dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", + dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", + dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", + dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "", + dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); + +} +struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap); + +static ssize_t version_show(struct dma_chan *c, char *page) +{ + struct dma_device *dma = c->device; + struct ioatdma_device *device = to_ioatdma_device(dma); + + return sprintf(page, "%d.%d\n", + device->version >> 4, device->version & 0xf); +} +struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version); + +static struct attribute *ioat1_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +static ssize_t +ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) +{ + struct ioat_sysfs_entry *entry; + struct ioat_chan_common *chan; + + entry = container_of(attr, struct ioat_sysfs_entry, attr); + chan = container_of(kobj, struct ioat_chan_common, kobj); + + if (!entry->show) + return -EIO; + return entry->show(&chan->common, page); +} + +struct sysfs_ops ioat_sysfs_ops = { + .show = ioat_attr_show, +}; + +static struct kobj_type ioat1_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat1_attrs, +}; + +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type) +{ + struct dma_device *dma = &device->common; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioat_chan_common *chan = to_chan_common(c); + struct kobject *parent = &c->dev->device.kobj; + int err; + + err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata"); + if (err) { + dev_warn(to_dev(chan), + "sysfs init error (%d), continuing...\n", err); + kobject_put(&chan->kobj); + set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state); + } + } +} + +void ioat_kobject_del(struct ioatdma_device *device) +{ + struct dma_device *dma = &device->common; + struct dma_chan *c; + + list_for_each_entry(c, &dma->channels, device_node) { + struct ioat_chan_common *chan = to_chan_common(c); + + if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) { + kobject_del(&chan->kobj); + kobject_put(&chan->kobj); + } + } +} + +int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + int err; + + device->intr_quirk = ioat1_intr_quirk; + device->enumerate_channels = ioat1_enumerate_channels; + device->self_test = ioat_dma_self_test; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; + dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; + dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources; + dma->device_free_chan_resources = ioat1_dma_free_chan_resources; + dma->device_is_tx_complete = ioat1_dma_is_complete; + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(4096); + err = ioat_register(device); + if (err) + return err; + ioat_kobject_add(device, &ioat1_ktype); + + if (dca) + device->dca = ioat_dca_init(pdev, device->reg_base); + + return err; +} + +void __devexit ioat_dma_remove(struct ioatdma_device *device) +{ + struct dma_device *dma = &device->common; + + ioat_disable_interrupts(device); + + ioat_kobject_del(device); + + dma_async_device_unregister(dma); + + pci_pool_destroy(device->dma_pool); + pci_pool_destroy(device->completion_pool); + + INIT_LIST_HEAD(&dma->channels); +} diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h new file mode 100644 index 00000000000..c14fdfeb7f3 --- /dev/null +++ b/drivers/dma/ioat/dma.h @@ -0,0 +1,337 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef IOATDMA_H +#define IOATDMA_H + +#include <linux/dmaengine.h> +#include "hw.h" +#include "registers.h" +#include <linux/init.h> +#include <linux/dmapool.h> +#include <linux/cache.h> +#include <linux/pci_ids.h> +#include <net/tcp.h> + +#define IOAT_DMA_VERSION "4.00" + +#define IOAT_LOW_COMPLETION_MASK 0xffffffc0 +#define IOAT_DMA_DCA_ANY_CPU ~0 + +#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) +#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) +#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd) +#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev) + +#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) + +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + +/** + * struct ioatdma_device - internal representation of a IOAT device + * @pdev: PCI-Express device + * @reg_base: MMIO register space base address + * @dma_pool: for allocating DMA descriptors + * @common: embedded struct dma_device + * @version: version of ioatdma device + * @msix_entries: irq handlers + * @idx: per channel data + * @dca: direct cache access context + * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) + * @enumerate_channels: hw version specific channel enumeration + * @cleanup_tasklet: select between the v2 and v3 cleanup routines + * @timer_fn: select between the v2 and v3 timer watchdog routines + * @self_test: hardware version specific self test for each supported op type + * + * Note: the v3 cleanup routine supports raid operations + */ +struct ioatdma_device { + struct pci_dev *pdev; + void __iomem *reg_base; + struct pci_pool *dma_pool; + struct pci_pool *completion_pool; + struct dma_device common; + u8 version; + struct msix_entry msix_entries[4]; + struct ioat_chan_common *idx[4]; + struct dca_provider *dca; + void (*intr_quirk)(struct ioatdma_device *device); + int (*enumerate_channels)(struct ioatdma_device *device); + void (*cleanup_tasklet)(unsigned long data); + void (*timer_fn)(unsigned long data); + int (*self_test)(struct ioatdma_device *device); +}; + +struct ioat_chan_common { + struct dma_chan common; + void __iomem *reg_base; + unsigned long last_completion; + spinlock_t cleanup_lock; + dma_cookie_t completed_cookie; + unsigned long state; + #define IOAT_COMPLETION_PENDING 0 + #define IOAT_COMPLETION_ACK 1 + #define IOAT_RESET_PENDING 2 + #define IOAT_KOBJ_INIT_FAIL 3 + struct timer_list timer; + #define COMPLETION_TIMEOUT msecs_to_jiffies(100) + #define IDLE_TIMEOUT msecs_to_jiffies(2000) + #define RESET_DELAY msecs_to_jiffies(100) + struct ioatdma_device *device; + dma_addr_t completion_dma; + u64 *completion; + struct tasklet_struct cleanup_task; + struct kobject kobj; +}; + +struct ioat_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct dma_chan *, char *); +}; + +/** + * struct ioat_dma_chan - internal representation of a DMA channel + */ +struct ioat_dma_chan { + struct ioat_chan_common base; + + size_t xfercap; /* XFERCAP register value expanded out */ + + spinlock_t desc_lock; + struct list_head free_desc; + struct list_head used_desc; + + int pending; + u16 desccount; + u16 active; +}; + +static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) +{ + return container_of(c, struct ioat_chan_common, common); +} + +static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c) +{ + struct ioat_chan_common *chan = to_chan_common(c); + + return container_of(chan, struct ioat_dma_chan, base); +} + +/** + * ioat_is_complete - poll the status of an ioat transaction + * @c: channel handle + * @cookie: transaction identifier + * @done: if set, updated with last completed transaction + * @used: if set, updated with last used transaction + */ +static inline enum dma_status +ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat_chan_common *chan = to_chan_common(c); + dma_cookie_t last_used; + dma_cookie_t last_complete; + + last_used = c->cookie; + last_complete = chan->completed_cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +/* wrapper around hardware descriptor format + additional software fields */ + +/** + * struct ioat_desc_sw - wrapper around hardware descriptor + * @hw: hardware DMA descriptor (for memcpy) + * @node: this descriptor will either be on the free list, + * or attached to a transaction list (tx_list) + * @txd: the generic software descriptor for all engines + * @id: identifier for debug + */ +struct ioat_desc_sw { + struct ioat_dma_descriptor *hw; + struct list_head node; + size_t len; + struct list_head tx_list; + struct dma_async_tx_descriptor txd; + #ifdef DEBUG + int id; + #endif +}; + +#ifdef DEBUG +#define set_desc_id(desc, i) ((desc)->id = (i)) +#define desc_id(desc) ((desc)->id) +#else +#define set_desc_id(desc, i) +#define desc_id(desc) (0) +#endif + +static inline void +__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, + struct dma_async_tx_descriptor *tx, int id) +{ + struct device *dev = to_dev(chan); + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x" + " ctl: %#x (op: %d int_en: %d compl: %d)\n", id, + (unsigned long long) tx->phys, + (unsigned long long) hw->next, tx->cookie, tx->flags, + hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write); +} + +#define dump_desc_dbg(c, d) \ + ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; }) + +static inline void ioat_set_tcp_copy_break(unsigned long copybreak) +{ + #ifdef CONFIG_NET_DMA + sysctl_tcp_dma_copybreak = copybreak; + #endif +} + +static inline struct ioat_chan_common * +ioat_chan_by_index(struct ioatdma_device *device, int index) +{ + return device->idx[index]; +} + +static inline u64 ioat_chansts(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + u64 status; + u32 status_lo; + + /* We need to read the low address first as this causes the + * chipset to latch the upper bits for the subsequent read + */ + status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver)); + status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver)); + status <<= 32; + status |= status_lo; + + return status; +} + +static inline void ioat_start(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + + writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline u64 ioat_chansts_to_addr(u64 status) +{ + return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; +} + +static inline u32 ioat_chanerr(struct ioat_chan_common *chan) +{ + return readl(chan->reg_base + IOAT_CHANERR_OFFSET); +} + +static inline void ioat_suspend(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + + writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr) +{ + struct ioat_chan_common *chan = &ioat->base; + + writel(addr & 0x00000000FFFFFFFF, + chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + writel(addr >> 32, + chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); +} + +static inline bool is_ioat_active(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE); +} + +static inline bool is_ioat_idle(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE); +} + +static inline bool is_ioat_halted(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED); +} + +static inline bool is_ioat_suspended(unsigned long status) +{ + return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED); +} + +/* channel was fatally programmed */ +static inline bool is_ioat_bug(unsigned long err) +{ + return !!(err & (IOAT_CHANERR_SRC_ADDR_ERR|IOAT_CHANERR_DEST_ADDR_ERR| + IOAT_CHANERR_NEXT_ADDR_ERR|IOAT_CHANERR_CONTROL_ERR| + IOAT_CHANERR_LENGTH_ERR)); +} + +static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, + int direction, enum dma_ctrl_flags flags, bool dst) +{ + if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) || + (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE))) + pci_unmap_single(pdev, addr, len, direction); + else + pci_unmap_page(pdev, addr, len, direction); +} + +int __devinit ioat_probe(struct ioatdma_device *device); +int __devinit ioat_register(struct ioatdma_device *device); +int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca); +int __devinit ioat_dma_self_test(struct ioatdma_device *device); +void __devexit ioat_dma_remove(struct ioatdma_device *device); +struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev, + void __iomem *iobase); +unsigned long ioat_get_current_completion(struct ioat_chan_common *chan); +void ioat_init_channel(struct ioatdma_device *device, + struct ioat_chan_common *chan, int idx, + void (*timer_fn)(unsigned long), + void (*tasklet)(unsigned long), + unsigned long ioat); +void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, + size_t len, struct ioat_dma_descriptor *hw); +bool ioat_cleanup_preamble(struct ioat_chan_common *chan, + unsigned long *phys_complete); +void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); +void ioat_kobject_del(struct ioatdma_device *device); +extern struct sysfs_ops ioat_sysfs_ops; +extern struct ioat_sysfs_entry ioat_version_attr; +extern struct ioat_sysfs_entry ioat_cap_attr; +#endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c new file mode 100644 index 00000000000..96ffab7d37a --- /dev/null +++ b/drivers/dma/ioat/dma_v2.c @@ -0,0 +1,871 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine (versions >= 2), which + * does asynchronous data movement and checksumming operations. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/workqueue.h> +#include <linux/i7300_idle.h> +#include "dma.h" +#include "dma_v2.h" +#include "registers.h" +#include "hw.h" + +int ioat_ring_alloc_order = 8; +module_param(ioat_ring_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_alloc_order, + "ioat2+: allocate 2^n descriptors per channel" + " (default: 8 max: 16)"); +static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; +module_param(ioat_ring_max_alloc_order, int, 0644); +MODULE_PARM_DESC(ioat_ring_max_alloc_order, + "ioat2+: upper limit for ring size (default: 16)"); + +void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) +{ + void * __iomem reg_base = ioat->base.reg_base; + + ioat->pending = 0; + ioat->dmacount += ioat2_ring_pending(ioat); + ioat->issued = ioat->head; + /* make descriptor updates globally visible before notifying channel */ + wmb(); + writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + dev_dbg(to_dev(&ioat->base), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); +} + +void ioat2_issue_pending(struct dma_chan *chan) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(chan); + + spin_lock_bh(&ioat->ring_lock); + if (ioat->pending == 1) + __ioat2_issue_pending(ioat); + spin_unlock_bh(&ioat->ring_lock); +} + +/** + * ioat2_update_pending - log pending descriptors + * @ioat: ioat2+ channel + * + * set pending to '1' unless pending is already set to '2', pending == 2 + * indicates that submission is temporarily blocked due to an in-flight + * reset. If we are already above the ioat_pending_level threshold then + * just issue pending. + * + * called with ring_lock held + */ +static void ioat2_update_pending(struct ioat2_dma_chan *ioat) +{ + if (unlikely(ioat->pending == 2)) + return; + else if (ioat2_ring_pending(ioat) > ioat_pending_level) + __ioat2_issue_pending(ioat); + else + ioat->pending = 1; +} + +static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) +{ + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + int idx; + + if (ioat2_ring_space(ioat) < 1) { + dev_err(to_dev(&ioat->base), + "Unable to start null desc - ring full\n"); + return; + } + + dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); + idx = ioat2_desc_alloc(ioat, 1); + desc = ioat2_get_ring_ent(ioat, idx); + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.compl_write = 1; + /* set size to non-zero value (channel returns error when size is 0) */ + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + async_tx_ack(&desc->txd); + ioat2_set_chainaddr(ioat, desc->txd.phys); + dump_desc_dbg(ioat, desc); + __ioat2_issue_pending(ioat); +} + +static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) +{ + spin_lock_bh(&ioat->ring_lock); + __ioat2_start_null_desc(ioat); + spin_unlock_bh(&ioat->ring_lock); +} + +static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) +{ + struct ioat_chan_common *chan = &ioat->base; + struct dma_async_tx_descriptor *tx; + struct ioat_ring_ent *desc; + bool seen_current = false; + u16 active; + int i; + + dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); + + active = ioat2_ring_active(ioat); + for (i = 0; i < active && !seen_current; i++) { + prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + tx = &desc->txd; + dump_desc_dbg(ioat, desc); + if (tx->cookie) { + ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + chan->completed_cookie = tx->cookie; + tx->cookie = 0; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys == phys_complete) + seen_current = true; + } + ioat->tail += i; + BUG_ON(!seen_current); /* no active descs have written a completion? */ + + chan->last_completion = phys_complete; + if (ioat->head == ioat->tail) { + dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } +} + +/** + * ioat2_cleanup - clean finished descriptors (advance tail pointer) + * @chan: ioat channel to be cleaned up + */ +static void ioat2_cleanup(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + + prefetch(chan->completion); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + if (!ioat_cleanup_preamble(chan, &phys_complete)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + if (!spin_trylock_bh(&ioat->ring_lock)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + __cleanup(ioat, phys_complete); + + spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&chan->cleanup_lock); +} + +void ioat2_cleanup_tasklet(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + + ioat2_cleanup(ioat); + writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); +} + +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + + /* set the tail to be re-issued */ + ioat->issued = ioat->tail; + ioat->dmacount = 0; + set_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + + dev_dbg(to_dev(chan), + "%s: head: %#x tail: %#x issued: %#x count: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); + + if (ioat2_ring_pending(ioat)) { + struct ioat_ring_ent *desc; + + desc = ioat2_get_ring_ent(ioat, ioat->tail); + ioat2_set_chainaddr(ioat, desc->txd.phys); + __ioat2_issue_pending(ioat); + } else + __ioat2_start_null_desc(ioat); +} + +static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + u32 status; + + status = ioat_chansts(chan); + if (is_ioat_active(status) || is_ioat_idle(status)) + ioat_suspend(chan); + while (is_ioat_active(status) || is_ioat_idle(status)) { + status = ioat_chansts(chan); + cpu_relax(); + } + + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + + __ioat2_restart_chan(ioat); +} + +void ioat2_timer_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + struct ioat_chan_common *chan = &ioat->base; + + spin_lock_bh(&chan->cleanup_lock); + if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + unsigned long phys_complete; + u64 status; + + spin_lock_bh(&ioat->ring_lock); + status = ioat_chansts(chan); + + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + BUG_ON(is_ioat_bug(chanerr)); + } + + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + ioat2_restart_channel(ioat); + else { + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&ioat->ring_lock); + } else { + u16 active; + + /* if the ring is idle, empty, and oversized try to step + * down the size + */ + spin_lock_bh(&ioat->ring_lock); + active = ioat2_ring_active(ioat); + if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) + reshape_ring(ioat, ioat->alloc_order-1); + spin_unlock_bh(&ioat->ring_lock); + + /* keep shrinking until we get back to our minimum + * default size + */ + if (ioat->alloc_order > ioat_get_alloc_order()) + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } + spin_unlock_bh(&chan->cleanup_lock); +} + +/** + * ioat2_enumerate_channels - find and initialize the device's channels + * @device: the device to be enumerated + */ +int ioat2_enumerate_channels(struct ioatdma_device *device) +{ + struct ioat2_dma_chan *ioat; + struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; + u8 xfercap_log; + int i; + + INIT_LIST_HEAD(&dma->channels); + dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + dma->chancnt &= 0x1f; /* bits [4:0] valid */ + if (dma->chancnt > ARRAY_SIZE(device->idx)) { + dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", + dma->chancnt, ARRAY_SIZE(device->idx)); + dma->chancnt = ARRAY_SIZE(device->idx); + } + xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET); + xfercap_log &= 0x1f; /* bits [4:0] valid */ + if (xfercap_log == 0) + return 0; + dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); + + /* FIXME which i/oat version is i7300? */ +#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL + if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) + dma->chancnt--; +#endif + for (i = 0; i < dma->chancnt; i++) { + ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); + if (!ioat) + break; + + ioat_init_channel(device, &ioat->base, i, + device->timer_fn, + device->cleanup_tasklet, + (unsigned long) ioat); + ioat->xfercap_log = xfercap_log; + spin_lock_init(&ioat->ring_lock); + } + dma->chancnt = i; + return i; +} + +static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) +{ + struct dma_chan *c = tx->chan; + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + dma_cookie_t cookie = c->cookie; + + cookie++; + if (cookie < 0) + cookie = 1; + tx->cookie = cookie; + c->cookie = cookie; + dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); + + if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + ioat2_update_pending(ioat); + spin_unlock_bh(&ioat->ring_lock); + + return cookie; +} + +static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) +{ + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + struct ioatdma_device *dma; + dma_addr_t phys; + + dma = to_ioatdma_device(chan->device); + hw = pci_pool_alloc(dma->dma_pool, flags, &phys); + if (!hw) + return NULL; + memset(hw, 0, sizeof(*hw)); + + desc = kmem_cache_alloc(ioat2_cache, flags); + if (!desc) { + pci_pool_free(dma->dma_pool, hw, phys); + return NULL; + } + memset(desc, 0, sizeof(*desc)); + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = ioat2_tx_submit_unlock; + desc->hw = hw; + desc->txd.phys = phys; + return desc; +} + +static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) +{ + struct ioatdma_device *dma; + + dma = to_ioatdma_device(chan->device); + pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys); + kmem_cache_free(ioat2_cache, desc); +} + +static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags) +{ + struct ioat_ring_ent **ring; + int descs = 1 << order; + int i; + + if (order > ioat_get_max_alloc_order()) + return NULL; + + /* allocate the array to hold the software ring */ + ring = kcalloc(descs, sizeof(*ring), flags); + if (!ring) + return NULL; + for (i = 0; i < descs; i++) { + ring[i] = ioat2_alloc_ring_ent(c, flags); + if (!ring[i]) { + while (i--) + ioat2_free_ring_ent(ring[i], c); + kfree(ring); + return NULL; + } + set_desc_id(ring[i], i); + } + + /* link descs */ + for (i = 0; i < descs-1; i++) { + struct ioat_ring_ent *next = ring[i+1]; + struct ioat_dma_descriptor *hw = ring[i]->hw; + + hw->next = next->txd.phys; + } + ring[i]->hw->next = ring[0]->txd.phys; + + return ring; +} + +/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring + * @chan: channel to be initialized + */ +int ioat2_alloc_chan_resources(struct dma_chan *c) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent **ring; + u32 chanerr; + int order; + + /* have we already been set up? */ + if (ioat->ring) + return 1 << ioat->alloc_order; + + /* Setup register to interrupt and write completion status on error */ + writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + if (chanerr) { + dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + } + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + chan->completion = pci_pool_alloc(chan->device->completion_pool, + GFP_KERNEL, &chan->completion_dma); + if (!chan->completion) + return -ENOMEM; + + memset(chan->completion, 0, sizeof(*chan->completion)); + writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, + chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64) chan->completion_dma) >> 32, + chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + order = ioat_get_alloc_order(); + ring = ioat2_alloc_ring(c, order, GFP_KERNEL); + if (!ring) + return -ENOMEM; + + spin_lock_bh(&ioat->ring_lock); + ioat->ring = ring; + ioat->head = 0; + ioat->issued = 0; + ioat->tail = 0; + ioat->pending = 0; + ioat->alloc_order = order; + spin_unlock_bh(&ioat->ring_lock); + + tasklet_enable(&chan->cleanup_task); + ioat2_start_null_desc(ioat); + + return 1 << ioat->alloc_order; +} + +bool reshape_ring(struct ioat2_dma_chan *ioat, int order) +{ + /* reshape differs from normal ring allocation in that we want + * to allocate a new software ring while only + * extending/truncating the hardware ring + */ + struct ioat_chan_common *chan = &ioat->base; + struct dma_chan *c = &chan->common; + const u16 curr_size = ioat2_ring_mask(ioat) + 1; + const u16 active = ioat2_ring_active(ioat); + const u16 new_size = 1 << order; + struct ioat_ring_ent **ring; + u16 i; + + if (order > ioat_get_max_alloc_order()) + return false; + + /* double check that we have at least 1 free descriptor */ + if (active == curr_size) + return false; + + /* when shrinking, verify that we can hold the current active + * set in the new ring + */ + if (active >= new_size) + return false; + + /* allocate the array to hold the software ring */ + ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); + if (!ring) + return false; + + /* allocate/trim descriptors as needed */ + if (new_size > curr_size) { + /* copy current descriptors to the new ring */ + for (i = 0; i < curr_size; i++) { + u16 curr_idx = (ioat->tail+i) & (curr_size-1); + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } + + /* add new descriptors to the ring */ + for (i = curr_size; i < new_size; i++) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT); + if (!ring[new_idx]) { + while (i--) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ioat2_free_ring_ent(ring[new_idx], c); + } + kfree(ring); + return false; + } + set_desc_id(ring[new_idx], new_idx); + } + + /* hw link new descriptors */ + for (i = curr_size-1; i < new_size; i++) { + u16 new_idx = (ioat->tail+i) & (new_size-1); + struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)]; + struct ioat_dma_descriptor *hw = ring[new_idx]->hw; + + hw->next = next->txd.phys; + } + } else { + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *next; + + /* copy current descriptors to the new ring, dropping the + * removed descriptors + */ + for (i = 0; i < new_size; i++) { + u16 curr_idx = (ioat->tail+i) & (curr_size-1); + u16 new_idx = (ioat->tail+i) & (new_size-1); + + ring[new_idx] = ioat->ring[curr_idx]; + set_desc_id(ring[new_idx], new_idx); + } + + /* free deleted descriptors */ + for (i = new_size; i < curr_size; i++) { + struct ioat_ring_ent *ent; + + ent = ioat2_get_ring_ent(ioat, ioat->tail+i); + ioat2_free_ring_ent(ent, c); + } + + /* fix up hardware ring */ + hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw; + next = ring[(ioat->tail+new_size) & (new_size-1)]; + hw->next = next->txd.phys; + } + + dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", + __func__, new_size); + + kfree(ioat->ring); + ioat->ring = ring; + ioat->alloc_order = order; + + return true; +} + +/** + * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops + * @idx: gets starting descriptor index on successful allocation + * @ioat: ioat2,3 channel (ring) to operate on + * @num_descs: allocation length + */ +int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) +{ + struct ioat_chan_common *chan = &ioat->base; + + spin_lock_bh(&ioat->ring_lock); + /* never allow the last descriptor to be consumed, we need at + * least one free at all times to allow for on-the-fly ring + * resizing. + */ + while (unlikely(ioat2_ring_space(ioat) <= num_descs)) { + if (reshape_ring(ioat, ioat->alloc_order + 1) && + ioat2_ring_space(ioat) > num_descs) + break; + + if (printk_ratelimit()) + dev_dbg(to_dev(chan), + "%s: ring full! num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat->head, ioat->tail, + ioat->issued); + spin_unlock_bh(&ioat->ring_lock); + + /* progress reclaim in the allocation failure case we + * may be called under bh_disabled so we need to trigger + * the timer event directly + */ + spin_lock_bh(&chan->cleanup_lock); + if (jiffies > chan->timer.expires && + timer_pending(&chan->timer)) { + struct ioatdma_device *device = chan->device; + + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + spin_unlock_bh(&chan->cleanup_lock); + device->timer_fn((unsigned long) ioat); + } else + spin_unlock_bh(&chan->cleanup_lock); + return -ENOMEM; + } + + dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n", + __func__, num_descs, ioat->head, ioat->tail, ioat->issued); + + *idx = ioat2_desc_alloc(ioat, num_descs); + return 0; /* with ioat->ring_lock held */ +} + +struct dma_async_tx_descriptor * +ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_dma_descriptor *hw; + struct ioat_ring_ent *desc; + dma_addr_t dst = dma_dest; + dma_addr_t src = dma_src; + size_t total_len = len; + int num_descs; + u16 idx; + int i; + + num_descs = ioat2_xferlen_to_descs(ioat, len); + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) + /* pass */; + else + return NULL; + i = 0; + do { + size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + hw = desc->hw; + + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + dump_desc_dbg(ioat, desc); + } while (++i < num_descs); + + desc->txd.flags = flags; + desc->len = total_len; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + dump_desc_dbg(ioat, desc); + /* we leave the channel locked to ensure in order submission */ + + return &desc->txd; +} + +/** + * ioat2_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ +void ioat2_free_chan_resources(struct dma_chan *c) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioatdma_device *device = chan->device; + struct ioat_ring_ent *desc; + const u16 total_descs = 1 << ioat->alloc_order; + int descs; + int i; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (!ioat->ring) + return; + + tasklet_disable(&chan->cleanup_task); + del_timer_sync(&chan->timer); + device->cleanup_tasklet((unsigned long) ioat); + + /* Delay 100ms after reset to allow internal DMA logic to quiesce + * before removing DMA descriptor resources. + */ + writeb(IOAT_CHANCMD_RESET, + chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); + mdelay(100); + + spin_lock_bh(&ioat->ring_lock); + descs = ioat2_ring_space(ioat); + dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs); + for (i = 0; i < descs; i++) { + desc = ioat2_get_ring_ent(ioat, ioat->head + i); + ioat2_free_ring_ent(desc, c); + } + + if (descs < total_descs) + dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", + total_descs - descs); + + for (i = 0; i < total_descs - descs; i++) { + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + dump_desc_dbg(ioat, desc); + ioat2_free_ring_ent(desc, c); + } + + kfree(ioat->ring); + ioat->ring = NULL; + ioat->alloc_order = 0; + pci_pool_free(device->completion_pool, chan->completion, + chan->completion_dma); + spin_unlock_bh(&ioat->ring_lock); + + chan->last_completion = 0; + chan->completion_dma = 0; + ioat->pending = 0; + ioat->dmacount = 0; +} + +enum dma_status +ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioatdma_device *device = ioat->base.device; + + if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + return DMA_SUCCESS; + + device->cleanup_tasklet((unsigned long) ioat); + + return ioat_is_complete(c, cookie, done, used); +} + +static ssize_t ring_size_show(struct dma_chan *c, char *page) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1); +} +static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); + +static ssize_t ring_active_show(struct dma_chan *c, char *page) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + /* ...taken outside the lock, no need to be precise */ + return sprintf(page, "%d\n", ioat2_ring_active(ioat)); +} +static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); + +static struct attribute *ioat2_attrs[] = { + &ring_size_attr.attr, + &ring_active_attr.attr, + &ioat_cap_attr.attr, + &ioat_version_attr.attr, + NULL, +}; + +struct kobj_type ioat2_ktype = { + .sysfs_ops = &ioat_sysfs_ops, + .default_attrs = ioat2_attrs, +}; + +int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + int err; + + device->enumerate_channels = ioat2_enumerate_channels; + device->cleanup_tasklet = ioat2_cleanup_tasklet; + device->timer_fn = ioat2_timer_event; + device->self_test = ioat_dma_self_test; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat2_issue_pending; + dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; + dma->device_free_chan_resources = ioat2_free_chan_resources; + dma->device_is_tx_complete = ioat2_is_complete; + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(2048); + + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, + chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + + ioat_kobject_add(device, &ioat2_ktype); + + if (dca) + device->dca = ioat2_dca_init(pdev, device->reg_base); + + return err; +} diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h new file mode 100644 index 00000000000..1d849ef74d5 --- /dev/null +++ b/drivers/dma/ioat/dma_v2.h @@ -0,0 +1,190 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef IOATDMA_V2_H +#define IOATDMA_V2_H + +#include <linux/dmaengine.h> +#include "dma.h" +#include "hw.h" + + +extern int ioat_pending_level; +extern int ioat_ring_alloc_order; + +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + +#define IOAT_MAX_ORDER 16 +#define ioat_get_alloc_order() \ + (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) +#define ioat_get_max_alloc_order() \ + (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) + +/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes + * @base: common ioat channel parameters + * @xfercap_log; log2 of channel max transfer length (for fast division) + * @head: allocated index + * @issued: hardware notification point + * @tail: cleanup index + * @pending: lock free indicator for issued != head + * @dmacount: identical to 'head' except for occasionally resetting to zero + * @alloc_order: log2 of the number of allocated descriptors + * @ring: software ring buffer implementation of hardware ring + * @ring_lock: protects ring attributes + */ +struct ioat2_dma_chan { + struct ioat_chan_common base; + size_t xfercap_log; + u16 head; + u16 issued; + u16 tail; + u16 dmacount; + u16 alloc_order; + int pending; + struct ioat_ring_ent **ring; + spinlock_t ring_lock; +}; + +static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c) +{ + struct ioat_chan_common *chan = to_chan_common(c); + + return container_of(chan, struct ioat2_dma_chan, base); +} + +static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat) +{ + return (1 << ioat->alloc_order) - 1; +} + +/* count of descriptors in flight with the engine */ +static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat) +{ + return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat); +} + +/* count of descriptors pending submission to hardware */ +static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat) +{ + return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat); +} + +static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat) +{ + u16 num_descs = ioat2_ring_mask(ioat) + 1; + u16 active = ioat2_ring_active(ioat); + + BUG_ON(active > num_descs); + + return num_descs - active; +} + +/* assumes caller already checked space */ +static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len) +{ + ioat->head += len; + return ioat->head - len; +} + +static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len) +{ + u16 num_descs = len >> ioat->xfercap_log; + + num_descs += !!(len & ((1 << ioat->xfercap_log) - 1)); + return num_descs; +} + +/** + * struct ioat_ring_ent - wrapper around hardware descriptor + * @hw: hardware DMA descriptor (for memcpy) + * @fill: hardware fill descriptor + * @xor: hardware xor descriptor + * @xor_ex: hardware xor extension descriptor + * @pq: hardware pq descriptor + * @pq_ex: hardware pq extension descriptor + * @pqu: hardware pq update descriptor + * @raw: hardware raw (un-typed) descriptor + * @txd: the generic software descriptor for all engines + * @len: total transaction length for unmap + * @result: asynchronous result of validate operations + * @id: identifier for debug + */ + +struct ioat_ring_ent { + union { + struct ioat_dma_descriptor *hw; + struct ioat_fill_descriptor *fill; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex; + struct ioat_pq_update_descriptor *pqu; + struct ioat_raw_descriptor *raw; + }; + size_t len; + struct dma_async_tx_descriptor txd; + enum sum_check_flags *result; + #ifdef DEBUG + int id; + #endif +}; + +static inline struct ioat_ring_ent * +ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx) +{ + return ioat->ring[idx & ioat2_ring_mask(ioat)]; +} + +static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr) +{ + struct ioat_chan_common *chan = &ioat->base; + + writel(addr & 0x00000000FFFFFFFF, + chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(addr >> 32, + chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); +} + +int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca); +int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca); +struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs); +int ioat2_enumerate_channels(struct ioatdma_device *device); +struct dma_async_tx_descriptor * +ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags); +void ioat2_issue_pending(struct dma_chan *chan); +int ioat2_alloc_chan_resources(struct dma_chan *c); +void ioat2_free_chan_resources(struct dma_chan *c); +enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used); +void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); +bool reshape_ring(struct ioat2_dma_chan *ioat, int order); +void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); +void ioat2_cleanup_tasklet(unsigned long data); +void ioat2_timer_event(unsigned long data); +extern struct kobj_type ioat2_ktype; +extern struct kmem_cache *ioat2_cache; +#endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c new file mode 100644 index 00000000000..35d1e33afd5 --- /dev/null +++ b/drivers/dma/ioat/dma_v3.c @@ -0,0 +1,1223 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * BSD LICENSE + * + * Copyright(c) 2004-2009 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Support routines for v3+ hardware + */ + +#include <linux/pci.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include "registers.h" +#include "hw.h" +#include "dma.h" +#include "dma_v2.h" + +/* ioat hardware assumes at least two sources for raid operations */ +#define src_cnt_to_sw(x) ((x) + 2) +#define src_cnt_to_hw(x) ((x) - 2) + +/* provide a lookup table for setting the source address in the base or + * extended descriptor of an xor or pq descriptor + */ +static const u8 xor_idx_to_desc __read_mostly = 0xd0; +static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; +static const u8 pq_idx_to_desc __read_mostly = 0xf8; +static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 }; + +static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) +{ + struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; + + return raw->field[xor_idx_to_field[idx]]; +} + +static void xor_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, int idx) +{ + struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; + + raw->field[xor_idx_to_field[idx]] = addr + offset; +} + +static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) +{ + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + return raw->field[pq_idx_to_field[idx]]; +} + +static void pq_set_src(struct ioat_raw_descriptor *descs[2], + dma_addr_t addr, u32 offset, u8 coef, int idx) +{ + struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; + struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; + + raw->field[pq_idx_to_field[idx]] = addr + offset; + pq->coef[idx] = coef; +} + +static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, + struct ioat_ring_ent *desc, int idx) +{ + struct ioat_chan_common *chan = &ioat->base; + struct pci_dev *pdev = chan->device->pdev; + size_t len = desc->len; + size_t offset = len - desc->hw->size; + struct dma_async_tx_descriptor *tx = &desc->txd; + enum dma_ctrl_flags flags = tx->flags; + + switch (desc->hw->ctl_f.op) { + case IOAT_OP_COPY: + if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */ + ioat_dma_unmap(chan, flags, len, desc->hw); + break; + case IOAT_OP_FILL: { + struct ioat_fill_descriptor *hw = desc->fill; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, hw->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); + break; + } + case IOAT_OP_XOR_VAL: + case IOAT_OP_XOR: { + struct ioat_xor_descriptor *xor = desc->xor; + struct ioat_ring_ent *ext; + struct ioat_xor_ext_descriptor *xor_ex = NULL; + int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt); + struct ioat_raw_descriptor *descs[2]; + int i; + + if (src_cnt > 5) { + ext = ioat2_get_ring_ent(ioat, idx + 1); + xor_ex = ext->xor_ex; + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + descs[0] = (struct ioat_raw_descriptor *) xor; + descs[1] = (struct ioat_raw_descriptor *) xor_ex; + for (i = 0; i < src_cnt; i++) { + dma_addr_t src = xor_get_src(descs, i); + + ioat_unmap(pdev, src - offset, len, + PCI_DMA_TODEVICE, flags, 0); + } + + /* dest is a source in xor validate operations */ + if (xor->ctl_f.op == IOAT_OP_XOR_VAL) { + ioat_unmap(pdev, xor->dst_addr - offset, len, + PCI_DMA_TODEVICE, flags, 1); + break; + } + } + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) + ioat_unmap(pdev, xor->dst_addr - offset, len, + PCI_DMA_FROMDEVICE, flags, 1); + break; + } + case IOAT_OP_PQ_VAL: + case IOAT_OP_PQ: { + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_ring_ent *ext; + struct ioat_pq_ext_descriptor *pq_ex = NULL; + int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); + struct ioat_raw_descriptor *descs[2]; + int i; + + if (src_cnt > 3) { + ext = ioat2_get_ring_ent(ioat, idx + 1); + pq_ex = ext->pq_ex; + } + + /* in the 'continue' case don't unmap the dests as sources */ + if (dmaf_p_disabled_continue(flags)) + src_cnt--; + else if (dmaf_continue(flags)) + src_cnt -= 3; + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + descs[0] = (struct ioat_raw_descriptor *) pq; + descs[1] = (struct ioat_raw_descriptor *) pq_ex; + for (i = 0; i < src_cnt; i++) { + dma_addr_t src = pq_get_src(descs, i); + + ioat_unmap(pdev, src - offset, len, + PCI_DMA_TODEVICE, flags, 0); + } + + /* the dests are sources in pq validate operations */ + if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + ioat_unmap(pdev, pq->p_addr - offset, + len, PCI_DMA_TODEVICE, flags, 0); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + ioat_unmap(pdev, pq->q_addr - offset, + len, PCI_DMA_TODEVICE, flags, 0); + break; + } + } + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (!(flags & DMA_PREP_PQ_DISABLE_P)) + ioat_unmap(pdev, pq->p_addr - offset, len, + PCI_DMA_BIDIRECTIONAL, flags, 1); + if (!(flags & DMA_PREP_PQ_DISABLE_Q)) + ioat_unmap(pdev, pq->q_addr - offset, len, + PCI_DMA_BIDIRECTIONAL, flags, 1); + } + break; + } + default: + dev_err(&pdev->dev, "%s: unknown op type: %#x\n", + __func__, desc->hw->ctl_f.op); + } +} + +static bool desc_has_ext(struct ioat_ring_ent *desc) +{ + struct ioat_dma_descriptor *hw = desc->hw; + + if (hw->ctl_f.op == IOAT_OP_XOR || + hw->ctl_f.op == IOAT_OP_XOR_VAL) { + struct ioat_xor_descriptor *xor = desc->xor; + + if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) + return true; + } else if (hw->ctl_f.op == IOAT_OP_PQ || + hw->ctl_f.op == IOAT_OP_PQ_VAL) { + struct ioat_pq_descriptor *pq = desc->pq; + + if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) + return true; + } + + return false; +} + +/** + * __cleanup - reclaim used descriptors + * @ioat: channel (ring) to clean + * + * The difference from the dma_v2.c __cleanup() is that this routine + * handles extended descriptors and dma-unmapping raid operations. + */ +static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) +{ + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent *desc; + bool seen_current = false; + u16 active; + int i; + + dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", + __func__, ioat->head, ioat->tail, ioat->issued); + + active = ioat2_ring_active(ioat); + for (i = 0; i < active && !seen_current; i++) { + struct dma_async_tx_descriptor *tx; + + prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); + desc = ioat2_get_ring_ent(ioat, ioat->tail + i); + dump_desc_dbg(ioat, desc); + tx = &desc->txd; + if (tx->cookie) { + chan->completed_cookie = tx->cookie; + ioat3_dma_unmap(ioat, desc, ioat->tail + i); + tx->cookie = 0; + if (tx->callback) { + tx->callback(tx->callback_param); + tx->callback = NULL; + } + } + + if (tx->phys == phys_complete) + seen_current = true; + + /* skip extended descriptors */ + if (desc_has_ext(desc)) { + BUG_ON(i + 1 >= active); + i++; + } + } + ioat->tail += i; + BUG_ON(!seen_current); /* no active descs have written a completion? */ + chan->last_completion = phys_complete; + if (ioat->head == ioat->tail) { + dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", + __func__); + clear_bit(IOAT_COMPLETION_PENDING, &chan->state); + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } +} + +static void ioat3_cleanup(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + + prefetch(chan->completion); + + if (!spin_trylock_bh(&chan->cleanup_lock)) + return; + + if (!ioat_cleanup_preamble(chan, &phys_complete)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + if (!spin_trylock_bh(&ioat->ring_lock)) { + spin_unlock_bh(&chan->cleanup_lock); + return; + } + + __cleanup(ioat, phys_complete); + + spin_unlock_bh(&ioat->ring_lock); + spin_unlock_bh(&chan->cleanup_lock); +} + +static void ioat3_cleanup_tasklet(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + + ioat3_cleanup(ioat); + writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN, + ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); +} + +static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + u32 status; + + status = ioat_chansts(chan); + if (is_ioat_active(status) || is_ioat_idle(status)) + ioat_suspend(chan); + while (is_ioat_active(status) || is_ioat_idle(status)) { + status = ioat_chansts(chan); + cpu_relax(); + } + + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + + __ioat2_restart_chan(ioat); +} + +static void ioat3_timer_event(unsigned long data) +{ + struct ioat2_dma_chan *ioat = (void *) data; + struct ioat_chan_common *chan = &ioat->base; + + spin_lock_bh(&chan->cleanup_lock); + if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { + unsigned long phys_complete; + u64 status; + + spin_lock_bh(&ioat->ring_lock); + status = ioat_chansts(chan); + + /* when halted due to errors check for channel + * programming errors before advancing the completion state + */ + if (is_ioat_halted(status)) { + u32 chanerr; + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + BUG_ON(is_ioat_bug(chanerr)); + } + + /* if we haven't made progress and we have already + * acknowledged a pending completion once, then be more + * forceful with a restart + */ + if (ioat_cleanup_preamble(chan, &phys_complete)) + __cleanup(ioat, phys_complete); + else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) + ioat3_restart_channel(ioat); + else { + set_bit(IOAT_COMPLETION_ACK, &chan->state); + mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); + } + spin_unlock_bh(&ioat->ring_lock); + } else { + u16 active; + + /* if the ring is idle, empty, and oversized try to step + * down the size + */ + spin_lock_bh(&ioat->ring_lock); + active = ioat2_ring_active(ioat); + if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) + reshape_ring(ioat, ioat->alloc_order-1); + spin_unlock_bh(&ioat->ring_lock); + + /* keep shrinking until we get back to our minimum + * default size + */ + if (ioat->alloc_order > ioat_get_alloc_order()) + mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); + } + spin_unlock_bh(&chan->cleanup_lock); +} + +static enum dma_status +ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + + if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) + return DMA_SUCCESS; + + ioat3_cleanup(ioat); + + return ioat_is_complete(c, cookie, done, used); +} + +static struct dma_async_tx_descriptor * +ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_ring_ent *desc; + size_t total_len = len; + struct ioat_fill_descriptor *fill; + int num_descs; + u64 src_data = (0x0101010101010101ULL) * (value & 0xff); + u16 idx; + int i; + + num_descs = ioat2_xferlen_to_descs(ioat, len); + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) + /* pass */; + else + return NULL; + i = 0; + do { + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + fill = desc->fill; + + fill->size = xfer_size; + fill->src_data = src_data; + fill->dst_addr = dest; + fill->ctl = 0; + fill->ctl_f.op = IOAT_OP_FILL; + + len -= xfer_size; + dest += xfer_size; + dump_desc_dbg(ioat, desc); + } while (++i < num_descs); + + desc->txd.flags = flags; + desc->len = total_len; + fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + fill->ctl_f.compl_write = 1; + dump_desc_dbg(ioat, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static struct dma_async_tx_descriptor * +__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, + dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_xor_descriptor *xor; + struct ioat_xor_ext_descriptor *xor_ex = NULL; + struct ioat_dma_descriptor *hw; + u32 offset = 0; + int num_descs; + int with_ext; + int i; + u16 idx; + u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; + + BUG_ON(src_cnt < 2); + + num_descs = ioat2_xferlen_to_descs(ioat, len); + /* we need 2x the number of descriptors to cover greater than 5 + * sources + */ + if (src_cnt > 5) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) + /* pass */; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + int s; + + desc = ioat2_get_ring_ent(ioat, idx + i); + xor = desc->xor; + + /* save a branch by unconditionally retrieving the + * extended descriptor xor_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat2_get_ring_ent(ioat, idx + i + 1); + xor_ex = ext->xor_ex; + + descs[0] = (struct ioat_raw_descriptor *) xor; + descs[1] = (struct ioat_raw_descriptor *) xor_ex; + for (s = 0; s < src_cnt; s++) + xor_set_src(descs, src[s], offset, s); + xor->size = xfer_size; + xor->dst_addr = dest + offset; + xor->ctl = 0; + xor->ctl_f.op = op; + xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); + + len -= xfer_size; + offset += xfer_size; + dump_desc_dbg(ioat, desc); + } while ((i += 1 + with_ext) < num_descs); + + /* last xor descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + + /* completion descriptor carries interrupt bit */ + compl_desc = ioat2_get_ring_ent(ioat, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat, compl_desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static struct dma_async_tx_descriptor * +ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); +} + +struct dma_async_tx_descriptor * +ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + return __ioat3_prep_xor_lock(chan, result, src[0], &src[1], + src_cnt - 1, len, flags); +} + +static void +dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext) +{ + struct device *dev = to_dev(&ioat->base); + struct ioat_pq_descriptor *pq = desc->pq; + struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; + struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; + int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); + int i; + + dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" + " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", + desc_id(desc), (unsigned long long) desc->txd.phys, + (unsigned long long) (pq_ex ? pq_ex->next : pq->next), + desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, + pq->ctl_f.compl_write, + pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", + pq->ctl_f.src_cnt); + for (i = 0; i < src_cnt; i++) + dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, + (unsigned long long) pq_get_src(descs, i), pq->coef[i]); + dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); + dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); +} + +static struct dma_async_tx_descriptor * +__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, + const dma_addr_t *dst, const dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_chan_common *chan = &ioat->base; + struct ioat_ring_ent *compl_desc; + struct ioat_ring_ent *desc; + struct ioat_ring_ent *ext; + size_t total_len = len; + struct ioat_pq_descriptor *pq; + struct ioat_pq_ext_descriptor *pq_ex = NULL; + struct ioat_dma_descriptor *hw; + u32 offset = 0; + int num_descs; + int with_ext; + int i, s; + u16 idx; + u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; + + dev_dbg(to_dev(chan), "%s\n", __func__); + /* the engine requires at least two sources (we provide + * at least 1 implied source in the DMA_PREP_CONTINUE case) + */ + BUG_ON(src_cnt + dmaf_continue(flags) < 2); + + num_descs = ioat2_xferlen_to_descs(ioat, len); + /* we need 2x the number of descriptors to cover greater than 3 + * sources + */ + if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) { + with_ext = 1; + num_descs *= 2; + } else + with_ext = 0; + + /* completion writes from the raid engine may pass completion + * writes from the legacy engine, so we need one extra null + * (legacy) descriptor to ensure all completion writes arrive in + * order. + */ + if (likely(num_descs) && + ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) + /* pass */; + else + return NULL; + i = 0; + do { + struct ioat_raw_descriptor *descs[2]; + size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); + + desc = ioat2_get_ring_ent(ioat, idx + i); + pq = desc->pq; + + /* save a branch by unconditionally retrieving the + * extended descriptor pq_set_src() knows to not write + * to it in the single descriptor case + */ + ext = ioat2_get_ring_ent(ioat, idx + i + with_ext); + pq_ex = ext->pq_ex; + + descs[0] = (struct ioat_raw_descriptor *) pq; + descs[1] = (struct ioat_raw_descriptor *) pq_ex; + + for (s = 0; s < src_cnt; s++) + pq_set_src(descs, src[s], offset, scf[s], s); + + /* see the comment for dma_maxpq in include/linux/dmaengine.h */ + if (dmaf_p_disabled_continue(flags)) + pq_set_src(descs, dst[1], offset, 1, s++); + else if (dmaf_continue(flags)) { + pq_set_src(descs, dst[0], offset, 0, s++); + pq_set_src(descs, dst[1], offset, 1, s++); + pq_set_src(descs, dst[1], offset, 0, s++); + } + pq->size = xfer_size; + pq->p_addr = dst[0] + offset; + pq->q_addr = dst[1] + offset; + pq->ctl = 0; + pq->ctl_f.op = op; + pq->ctl_f.src_cnt = src_cnt_to_hw(s); + pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); + pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); + + len -= xfer_size; + offset += xfer_size; + } while ((i += 1 + with_ext) < num_descs); + + /* last pq descriptor carries the unmap parameters and fence bit */ + desc->txd.flags = flags; + desc->len = total_len; + if (result) + desc->result = result; + pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + dump_pq_desc_dbg(ioat, desc, ext); + + /* completion descriptor carries interrupt bit */ + compl_desc = ioat2_get_ring_ent(ioat, idx + i); + compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; + hw = compl_desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + dump_desc_dbg(ioat, compl_desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static struct dma_async_tx_descriptor * +ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + /* handle the single source multiply case from the raid6 + * recovery path + */ + if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) { + dma_addr_t single_source[2]; + unsigned char single_source_coef[2]; + + BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); + single_source[0] = src[0]; + single_source[1] = src[0]; + single_source_coef[0] = scf[0]; + single_source_coef[1] = 0; + + return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, + single_source_coef, len, flags); + } else + return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, + len, flags); +} + +struct dma_async_tx_descriptor * +ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags) +{ + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *pqres = 0; + + return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, + flags); +} + +static struct dma_async_tx_descriptor * +ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + unsigned char scf[src_cnt]; + dma_addr_t pq[2]; + + memset(scf, 0, src_cnt); + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[0] = dst; + pq[1] = ~0; + + return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, + flags); +} + +struct dma_async_tx_descriptor * +ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, + unsigned int src_cnt, size_t len, + enum sum_check_flags *result, unsigned long flags) +{ + unsigned char scf[src_cnt]; + dma_addr_t pq[2]; + + /* the cleanup routine only sets bits on validate failure, it + * does not clear bits on validate success... so clear it here + */ + *result = 0; + + memset(scf, 0, src_cnt); + flags |= DMA_PREP_PQ_DISABLE_Q; + pq[0] = src[0]; + pq[1] = ~0; + + return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf, + len, flags); +} + +static struct dma_async_tx_descriptor * +ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) +{ + struct ioat2_dma_chan *ioat = to_ioat2_chan(c); + struct ioat_ring_ent *desc; + struct ioat_dma_descriptor *hw; + u16 idx; + + if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0) + desc = ioat2_get_ring_ent(ioat, idx); + else + return NULL; + + hw = desc->hw; + hw->ctl = 0; + hw->ctl_f.null = 1; + hw->ctl_f.int_en = 1; + hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); + hw->ctl_f.compl_write = 1; + hw->size = NULL_DESC_BUFFER_SIZE; + hw->src_addr = 0; + hw->dst_addr = 0; + + desc->txd.flags = flags; + desc->len = 1; + + dump_desc_dbg(ioat, desc); + + /* we leave the channel locked to ensure in order submission */ + return &desc->txd; +} + +static void __devinit ioat3_dma_test_callback(void *dma_async_param) +{ + struct completion *cmp = dma_async_param; + + complete(cmp); +} + +#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ +static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[IOAT_NUM_SRC_TEST]; + struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; + dma_addr_t dma_addr, dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + u32 xor_val_result; + int err = 0; + struct completion cmp; + unsigned long tmo; + struct device *dev = &device->pdev->dev; + struct dma_device *dma = &device->common; + + dev_dbg(dev, "%s\n", __func__); + + if (!dma_has_cap(DMA_XOR, dma->cap_mask)) + return 0; + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) { + while (src_idx--) + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(dma->channels.next, struct dma_chan, + device_node); + if (dma->device_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + IOAT_NUM_SRC_TEST, PAGE_SIZE, + DMA_PREP_INTERRUPT); + + if (!tx) { + dev_err(dev, "Self-test xor prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test xor setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test xor timed out\n"); + err = -ENODEV; + goto free_resources; + } + + dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + if (ptr[i] != cmp_word) { + dev_err(dev, "Self-test xor failed compare\n"); + err = -ENODEV; + goto free_resources; + } + } + dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE); + + /* skip validate if the capability is not present */ + if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) + goto free_resources; + + /* validate the sources with the destintation page */ + for (i = 0; i < IOAT_NUM_SRC_TEST; i++) + xor_val_srcs[i] = xor_srcs[i]; + xor_val_srcs[i] = dest; + + xor_val_result = 1; + + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test zero prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test zero setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test validate timed out\n"); + err = -ENODEV; + goto free_resources; + } + + if (xor_val_result != 0) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto free_resources; + } + + /* skip memset if the capability is not present */ + if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask)) + goto free_resources; + + /* test memset */ + dma_addr = dma_map_page(dev, dest, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE, + DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test memset prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test memset setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test memset timed out\n"); + err = -ENODEV; + goto free_resources; + } + + for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { + u32 *ptr = page_address(dest); + if (ptr[i]) { + dev_err(dev, "Self-test memset failed compare\n"); + err = -ENODEV; + goto free_resources; + } + } + + /* test for non-zero parity sum */ + xor_val_result = 0; + for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, + IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, + &xor_val_result, DMA_PREP_INTERRUPT); + if (!tx) { + dev_err(dev, "Self-test 2nd zero prep failed\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat3_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(dev, "Self-test 2nd zero setup failed\n"); + err = -ENODEV; + goto free_resources; + } + dma->device_issue_pending(dma_chan); + + tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_err(dev, "Self-test 2nd validate timed out\n"); + err = -ENODEV; + goto free_resources; + } + + if (xor_val_result != SUM_CHECK_P_RESULT) { + dev_err(dev, "Self-test validate failed compare\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + dma->device_free_chan_resources(dma_chan); +out: + src_idx = IOAT_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int __devinit ioat3_dma_self_test(struct ioatdma_device *device) +{ + int rc = ioat_dma_self_test(device); + + if (rc) + return rc; + + rc = ioat_xor_val_self_test(device); + if (rc) + return rc; + + return 0; +} + +int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) +{ + struct pci_dev *pdev = device->pdev; + struct dma_device *dma; + struct dma_chan *c; + struct ioat_chan_common *chan; + bool is_raid_device = false; + int err; + u16 dev_id; + u32 cap; + + device->enumerate_channels = ioat2_enumerate_channels; + device->self_test = ioat3_dma_self_test; + dma = &device->common; + dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; + dma->device_issue_pending = ioat2_issue_pending; + dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; + dma->device_free_chan_resources = ioat2_free_chan_resources; + + dma_cap_set(DMA_INTERRUPT, dma->cap_mask); + dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; + + cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); + if (cap & IOAT_CAP_XOR) { + is_raid_device = true; + dma->max_xor = 8; + dma->xor_align = 2; + + dma_cap_set(DMA_XOR, dma->cap_mask); + dma->device_prep_dma_xor = ioat3_prep_xor; + + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = ioat3_prep_xor_val; + } + if (cap & IOAT_CAP_PQ) { + is_raid_device = true; + dma_set_maxpq(dma, 8, 0); + dma->pq_align = 2; + + dma_cap_set(DMA_PQ, dma->cap_mask); + dma->device_prep_dma_pq = ioat3_prep_pq; + + dma_cap_set(DMA_PQ_VAL, dma->cap_mask); + dma->device_prep_dma_pq_val = ioat3_prep_pq_val; + + if (!(cap & IOAT_CAP_XOR)) { + dma->max_xor = 8; + dma->xor_align = 2; + + dma_cap_set(DMA_XOR, dma->cap_mask); + dma->device_prep_dma_xor = ioat3_prep_pqxor; + + dma_cap_set(DMA_XOR_VAL, dma->cap_mask); + dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; + } + } + if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) { + dma_cap_set(DMA_MEMSET, dma->cap_mask); + dma->device_prep_dma_memset = ioat3_prep_memset_lock; + } + + + if (is_raid_device) { + dma->device_is_tx_complete = ioat3_is_complete; + device->cleanup_tasklet = ioat3_cleanup_tasklet; + device->timer_fn = ioat3_timer_event; + } else { + dma->device_is_tx_complete = ioat2_is_complete; + device->cleanup_tasklet = ioat2_cleanup_tasklet; + device->timer_fn = ioat2_timer_event; + } + + /* -= IOAT ver.3 workarounds =- */ + /* Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3 + */ + pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) + pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + + err = ioat_probe(device); + if (err) + return err; + ioat_set_tcp_copy_break(262144); + + list_for_each_entry(c, &dma->channels, device_node) { + chan = to_chan_common(c); + writel(IOAT_DMA_DCA_ANY_CPU, + chan->reg_base + IOAT_DCACTRL_OFFSET); + } + + err = ioat_register(device); + if (err) + return err; + + ioat_kobject_add(device, &ioat2_ktype); + + if (dca) + device->dca = ioat3_dca_init(pdev, device->reg_base); + + return 0; +} diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h new file mode 100644 index 00000000000..99afb12bd40 --- /dev/null +++ b/drivers/dma/ioat/hw.h @@ -0,0 +1,215 @@ +/* + * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef _IOAT_HW_H_ +#define _IOAT_HW_H_ + +/* PCI Configuration Space Values */ +#define IOAT_PCI_VID 0x8086 +#define IOAT_MMIO_BAR 0 + +/* CB device ID's */ +#define IOAT_PCI_DID_5000 0x1A38 +#define IOAT_PCI_DID_CNB 0x360B +#define IOAT_PCI_DID_SCNB 0x65FF +#define IOAT_PCI_DID_SNB 0x402F + +#define IOAT_PCI_RID 0x00 +#define IOAT_PCI_SVID 0x8086 +#define IOAT_PCI_SID 0x8086 +#define IOAT_VER_1_2 0x12 /* Version 1.2 */ +#define IOAT_VER_2_0 0x20 /* Version 2.0 */ +#define IOAT_VER_3_0 0x30 /* Version 3.0 */ +#define IOAT_VER_3_2 0x32 /* Version 3.2 */ + +struct ioat_dma_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int null:1; + unsigned int src_brk:1; + unsigned int dest_brk:1; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int rsvd2:13; + #define IOAT_OP_COPY 0x00 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t dst_addr; + uint64_t next; + uint64_t rsv1; + uint64_t rsv2; + /* store some driver data in an unused portion of the descriptor */ + union { + uint64_t user1; + uint64_t tx_cnt; + }; + uint64_t user2; +}; + +struct ioat_fill_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int rsvd:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int rsvd2:2; + unsigned int dest_brk:1; + unsigned int bundle:1; + unsigned int rsvd4:15; + #define IOAT_OP_FILL 0x01 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_data; + uint64_t dst_addr; + uint64_t next; + uint64_t rsv1; + uint64_t next_dst_addr; + uint64_t user1; + uint64_t user2; +}; + +struct ioat_xor_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int rsvd:13; + #define IOAT_OP_XOR 0x87 + #define IOAT_OP_XOR_VAL 0x88 + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t dst_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t src_addr3; + uint64_t src_addr4; + uint64_t src_addr5; +}; + +struct ioat_xor_ext_descriptor { + uint64_t src_addr6; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t next; + uint64_t rsvd[4]; +}; + +struct ioat_pq_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int p_disable:1; + unsigned int q_disable:1; + unsigned int rsvd:11; + #define IOAT_OP_PQ 0x89 + #define IOAT_OP_PQ_VAL 0x8a + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t p_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t src_addr3; + uint8_t coef[8]; + uint64_t q_addr; +}; + +struct ioat_pq_ext_descriptor { + uint64_t src_addr4; + uint64_t src_addr5; + uint64_t src_addr6; + uint64_t next; + uint64_t src_addr7; + uint64_t src_addr8; + uint64_t rsvd[2]; +}; + +struct ioat_pq_update_descriptor { + uint32_t size; + union { + uint32_t ctl; + struct { + unsigned int int_en:1; + unsigned int src_snoop_dis:1; + unsigned int dest_snoop_dis:1; + unsigned int compl_write:1; + unsigned int fence:1; + unsigned int src_cnt:3; + unsigned int bundle:1; + unsigned int dest_dca:1; + unsigned int hint:1; + unsigned int p_disable:1; + unsigned int q_disable:1; + unsigned int rsvd:3; + unsigned int coef:8; + #define IOAT_OP_PQ_UP 0x8b + unsigned int op:8; + } ctl_f; + }; + uint64_t src_addr; + uint64_t p_addr; + uint64_t next; + uint64_t src_addr2; + uint64_t p_src; + uint64_t q_src; + uint64_t q_addr; +}; + +struct ioat_raw_descriptor { + uint64_t field[8]; +}; +#endif diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c new file mode 100644 index 00000000000..d545fae30f3 --- /dev/null +++ b/drivers/dma/ioat/pci.c @@ -0,0 +1,210 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2007 - 2009 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine, which does asynchronous + * copy operations. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dca.h> +#include "dma.h" +#include "dma_v2.h" +#include "registers.h" +#include "hw.h" + +MODULE_VERSION(IOAT_DMA_VERSION); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); + +static struct pci_device_id ioat_pci_tbl[] = { + /* I/OAT v1 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, + { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, + + /* I/OAT v2 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, + + /* I/OAT v3 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + + /* I/OAT v3.2 platforms */ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, + + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); + +static int __devinit ioat_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id); +static void __devexit ioat_remove(struct pci_dev *pdev); + +static int ioat_dca_enabled = 1; +module_param(ioat_dca_enabled, int, 0644); +MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); + +struct kmem_cache *ioat2_cache; + +#define DRV_NAME "ioatdma" + +static struct pci_driver ioat_pci_driver = { + .name = DRV_NAME, + .id_table = ioat_pci_tbl, + .probe = ioat_pci_probe, + .remove = __devexit_p(ioat_remove), +}; + +static struct ioatdma_device * +alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase) +{ + struct device *dev = &pdev->dev; + struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); + + if (!d) + return NULL; + d->pdev = pdev; + d->reg_base = iobase; + return d; +} + +static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + void __iomem * const *iomap; + struct device *dev = &pdev->dev; + struct ioatdma_device *device; + int err; + + err = pcim_enable_device(pdev); + if (err) + return err; + + err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME); + if (err) + return err; + iomap = pcim_iomap_table(pdev); + if (!iomap) + return -ENOMEM; + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + return err; + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) + return err; + + device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL); + if (!device) + return -ENOMEM; + + pci_set_master(pdev); + + device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); + if (!device) + return -ENOMEM; + pci_set_drvdata(pdev, device); + + device->version = readb(device->reg_base + IOAT_VER_OFFSET); + if (device->version == IOAT_VER_1_2) + err = ioat1_dma_probe(device, ioat_dca_enabled); + else if (device->version == IOAT_VER_2_0) + err = ioat2_dma_probe(device, ioat_dca_enabled); + else if (device->version >= IOAT_VER_3_0) + err = ioat3_dma_probe(device, ioat_dca_enabled); + else + return -ENODEV; + + if (err) { + dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); + return -ENODEV; + } + + return 0; +} + +static void __devexit ioat_remove(struct pci_dev *pdev) +{ + struct ioatdma_device *device = pci_get_drvdata(pdev); + + if (!device) + return; + + dev_err(&pdev->dev, "Removing dma and dca services\n"); + if (device->dca) { + unregister_dca_provider(device->dca, &pdev->dev); + free_dca_provider(device->dca); + device->dca = NULL; + } + ioat_dma_remove(device); +} + +static int __init ioat_init_module(void) +{ + int err; + + pr_info("%s: Intel(R) QuickData Technology Driver %s\n", + DRV_NAME, IOAT_DMA_VERSION); + + ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!ioat2_cache) + return -ENOMEM; + + err = pci_register_driver(&ioat_pci_driver); + if (err) + kmem_cache_destroy(ioat2_cache); + + return err; +} +module_init(ioat_init_module); + +static void __exit ioat_exit_module(void) +{ + pci_unregister_driver(&ioat_pci_driver); + kmem_cache_destroy(ioat2_cache); +} +module_exit(ioat_exit_module); diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioat/registers.h index 49bc277424f..63038e18ab0 100644 --- a/drivers/dma/ioatdma_registers.h +++ b/drivers/dma/ioat/registers.h @@ -64,18 +64,37 @@ #define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ #define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 +#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002 +#define IOAT_DEVICE_MEMORY_BYPASS 0x0004 +#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008 + +#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */ +#define IOAT_CAP_PAGE_BREAK 0x00000001 +#define IOAT_CAP_CRC 0x00000002 +#define IOAT_CAP_SKIP_MARKER 0x00000004 +#define IOAT_CAP_DCA 0x00000010 +#define IOAT_CAP_CRC_MOVE 0x00000020 +#define IOAT_CAP_FILL_BLOCK 0x00000040 +#define IOAT_CAP_APIC 0x00000080 +#define IOAT_CAP_XOR 0x00000100 +#define IOAT_CAP_PQ 0x00000200 #define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ /* DMA Channel Registers */ #define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */ #define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 +#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200 #define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 #define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 #define IOAT_CHANCTRL_ERR_INT_EN 0x0010 #define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008 #define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 -#define IOAT_CHANCTRL_INT_DISABLE 0x0001 +#define IOAT_CHANCTRL_INT_REARM 0x0001 +#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\ + IOAT_CHANCTRL_ERR_COMPLETION_EN |\ + IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\ + IOAT_CHANCTRL_ERR_INT_EN) #define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ #define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ @@ -94,14 +113,14 @@ #define IOAT2_CHANSTS_OFFSET_HIGH 0x0C #define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) -#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F -#define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010 -#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 -#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3 +#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL) +#define IOAT_CHANSTS_SOFT_ERR 0x10ULL +#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL +#define IOAT_CHANSTS_STATUS 0x7ULL +#define IOAT_CHANSTS_ACTIVE 0x0 +#define IOAT_CHANSTS_DONE 0x1 +#define IOAT_CHANSTS_SUSPENDED 0x2 +#define IOAT_CHANSTS_HALTED 0x3 @@ -204,22 +223,27 @@ #define IOAT_CDAR_OFFSET_HIGH 0x24 #define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */ -#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001 -#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR 0x0002 -#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR 0x0004 -#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR 0x0008 +#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001 +#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002 +#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004 +#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008 #define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010 #define IOAT_CHANERR_CHANCMD_ERR 0x0020 #define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040 #define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080 #define IOAT_CHANERR_READ_DATA_ERR 0x0100 #define IOAT_CHANERR_WRITE_DATA_ERR 0x0200 -#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR 0x0400 -#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR 0x0800 +#define IOAT_CHANERR_CONTROL_ERR 0x0400 +#define IOAT_CHANERR_LENGTH_ERR 0x0800 #define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 #define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 #define IOAT_CHANERR_SOFT_ERR 0x4000 #define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 +#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000 +#define IOAT_CHANERR_XOR_Q_ERR 0x20000 +#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000 + +#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR) #define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c deleted file mode 100644 index a600fc0f796..00000000000 --- a/drivers/dma/ioat_dma.c +++ /dev/null @@ -1,1741 +0,0 @@ -/* - * Intel I/OAT DMA Linux driver - * Copyright(c) 2004 - 2009 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - */ - -/* - * This driver supports an Intel I/OAT DMA engine, which does asynchronous - * copy operations. - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/dmaengine.h> -#include <linux/delay.h> -#include <linux/dma-mapping.h> -#include <linux/workqueue.h> -#include <linux/i7300_idle.h> -#include "ioatdma.h" -#include "ioatdma_registers.h" -#include "ioatdma_hw.h" - -#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) -#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) -#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) -#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) - -#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) -static int ioat_pending_level = 4; -module_param(ioat_pending_level, int, 0644); -MODULE_PARM_DESC(ioat_pending_level, - "high-water mark for pushing ioat descriptors (default: 4)"); - -#define RESET_DELAY msecs_to_jiffies(100) -#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000)) -static void ioat_dma_chan_reset_part2(struct work_struct *work); -static void ioat_dma_chan_watchdog(struct work_struct *work); - -/* - * workaround for IOAT ver.3.0 null descriptor issue - * (channel returns error when size is 0) - */ -#define NULL_DESC_BUFFER_SIZE 1 - -/* internal functions */ -static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); -static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); - -static struct ioat_desc_sw * -ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); -static struct ioat_desc_sw * -ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); - -static inline struct ioat_dma_chan *ioat_lookup_chan_by_index( - struct ioatdma_device *device, - int index) -{ - return device->idx[index]; -} - -/** - * ioat_dma_do_interrupt - handler used for single vector interrupt mode - * @irq: interrupt id - * @data: interrupt data - */ -static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) -{ - struct ioatdma_device *instance = data; - struct ioat_dma_chan *ioat_chan; - unsigned long attnstatus; - int bit; - u8 intrctrl; - - intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); - - if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) - return IRQ_NONE; - - if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { - writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); - return IRQ_NONE; - } - - attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); - for_each_bit(bit, &attnstatus, BITS_PER_LONG) { - ioat_chan = ioat_lookup_chan_by_index(instance, bit); - tasklet_schedule(&ioat_chan->cleanup_task); - } - - writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); - return IRQ_HANDLED; -} - -/** - * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode - * @irq: interrupt id - * @data: interrupt data - */ -static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) -{ - struct ioat_dma_chan *ioat_chan = data; - - tasklet_schedule(&ioat_chan->cleanup_task); - - return IRQ_HANDLED; -} - -static void ioat_dma_cleanup_tasklet(unsigned long data); - -/** - * ioat_dma_enumerate_channels - find and initialize the device's channels - * @device: the device to be enumerated - */ -static int ioat_dma_enumerate_channels(struct ioatdma_device *device) -{ - u8 xfercap_scale; - u32 xfercap; - int i; - struct ioat_dma_chan *ioat_chan; - - /* - * IOAT ver.3 workarounds - */ - if (device->version == IOAT_VER_3_0) { - u32 chan_err_mask; - u16 dev_id; - u32 dmauncerrsts; - - /* - * Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - chan_err_mask = 0x3E07; - pci_write_config_dword(device->pdev, - IOAT_PCI_CHANERRMASK_INT_OFFSET, - chan_err_mask); - - /* - * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(device->pdev, - IOAT_PCI_DEVICE_ID_OFFSET, - &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { - dmauncerrsts = 0x10; - pci_write_config_dword(device->pdev, - IOAT_PCI_DMAUNCERRSTS_OFFSET, - dmauncerrsts); - } - } - - device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); - xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); - xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); - -#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL - if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) { - device->common.chancnt--; - } -#endif - for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL); - if (!ioat_chan) { - device->common.chancnt = i; - break; - } - - ioat_chan->device = device; - ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); - ioat_chan->xfercap = xfercap; - ioat_chan->desccount = 0; - INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2); - if (ioat_chan->device->version == IOAT_VER_2_0) - writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | - IOAT_DMA_DCA_ANY_CPU, - ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); - else if (ioat_chan->device->version == IOAT_VER_3_0) - writel(IOAT_DMA_DCA_ANY_CPU, - ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); - spin_lock_init(&ioat_chan->cleanup_lock); - spin_lock_init(&ioat_chan->desc_lock); - INIT_LIST_HEAD(&ioat_chan->free_desc); - INIT_LIST_HEAD(&ioat_chan->used_desc); - /* This should be made common somewhere in dmaengine.c */ - ioat_chan->common.device = &device->common; - list_add_tail(&ioat_chan->common.device_node, - &device->common.channels); - device->idx[i] = ioat_chan; - tasklet_init(&ioat_chan->cleanup_task, - ioat_dma_cleanup_tasklet, - (unsigned long) ioat_chan); - tasklet_disable(&ioat_chan->cleanup_task); - } - return device->common.chancnt; -} - -/** - * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended - * descriptors to hw - * @chan: DMA channel handle - */ -static inline void __ioat1_dma_memcpy_issue_pending( - struct ioat_dma_chan *ioat_chan) -{ - ioat_chan->pending = 0; - writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET); -} - -static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - - if (ioat_chan->pending > 0) { - spin_lock_bh(&ioat_chan->desc_lock); - __ioat1_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - } -} - -static inline void __ioat2_dma_memcpy_issue_pending( - struct ioat_dma_chan *ioat_chan) -{ - ioat_chan->pending = 0; - writew(ioat_chan->dmacount, - ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); -} - -static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - - if (ioat_chan->pending > 0) { - spin_lock_bh(&ioat_chan->desc_lock); - __ioat2_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - } -} - - -/** - * ioat_dma_chan_reset_part2 - reinit the channel after a reset - */ -static void ioat_dma_chan_reset_part2(struct work_struct *work) -{ - struct ioat_dma_chan *ioat_chan = - container_of(work, struct ioat_dma_chan, work.work); - struct ioat_desc_sw *desc; - - spin_lock_bh(&ioat_chan->cleanup_lock); - spin_lock_bh(&ioat_chan->desc_lock); - - ioat_chan->completion_virt->low = 0; - ioat_chan->completion_virt->high = 0; - ioat_chan->pending = 0; - - /* - * count the descriptors waiting, and be sure to do it - * right for both the CB1 line and the CB2 ring - */ - ioat_chan->dmacount = 0; - if (ioat_chan->used_desc.prev) { - desc = to_ioat_desc(ioat_chan->used_desc.prev); - do { - ioat_chan->dmacount++; - desc = to_ioat_desc(desc->node.next); - } while (&desc->node != ioat_chan->used_desc.next); - } - - /* - * write the new starting descriptor address - * this puts channel engine into ARMED state - */ - desc = to_ioat_desc(ioat_chan->used_desc.prev); - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - - writeb(IOAT_CHANCMD_START, ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); - break; - case IOAT_VER_2_0: - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); - - /* tell the engine to go with what's left to be done */ - writew(ioat_chan->dmacount, - ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); - - break; - } - dev_err(&ioat_chan->device->pdev->dev, - "chan%d reset - %d descs waiting, %d total desc\n", - chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); - - spin_unlock_bh(&ioat_chan->desc_lock); - spin_unlock_bh(&ioat_chan->cleanup_lock); -} - -/** - * ioat_dma_reset_channel - restart a channel - * @ioat_chan: IOAT DMA channel handle - */ -static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan) -{ - u32 chansts, chanerr; - - if (!ioat_chan->used_desc.prev) - return; - - chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - chansts = (ioat_chan->completion_virt->low - & IOAT_CHANSTS_DMA_TRANSFER_STATUS); - if (chanerr) { - dev_err(&ioat_chan->device->pdev->dev, - "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", - chan_num(ioat_chan), chansts, chanerr); - writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - } - - /* - * whack it upside the head with a reset - * and wait for things to settle out. - * force the pending count to a really big negative - * to make sure no one forces an issue_pending - * while we're waiting. - */ - - spin_lock_bh(&ioat_chan->desc_lock); - ioat_chan->pending = INT_MIN; - writeb(IOAT_CHANCMD_RESET, - ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); - spin_unlock_bh(&ioat_chan->desc_lock); - - /* schedule the 2nd half instead of sleeping a long time */ - schedule_delayed_work(&ioat_chan->work, RESET_DELAY); -} - -/** - * ioat_dma_chan_watchdog - watch for stuck channels - */ -static void ioat_dma_chan_watchdog(struct work_struct *work) -{ - struct ioatdma_device *device = - container_of(work, struct ioatdma_device, work.work); - struct ioat_dma_chan *ioat_chan; - int i; - - union { - u64 full; - struct { - u32 low; - u32 high; - }; - } completion_hw; - unsigned long compl_desc_addr_hw; - - for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = ioat_lookup_chan_by_index(device, i); - - if (ioat_chan->device->version == IOAT_VER_1_2 - /* have we started processing anything yet */ - && ioat_chan->last_completion - /* have we completed any since last watchdog cycle? */ - && (ioat_chan->last_completion == - ioat_chan->watchdog_completion) - /* has TCP stuck on one cookie since last watchdog? */ - && (ioat_chan->watchdog_tcp_cookie == - ioat_chan->watchdog_last_tcp_cookie) - && (ioat_chan->watchdog_tcp_cookie != - ioat_chan->completed_cookie) - /* is there something in the chain to be processed? */ - /* CB1 chain always has at least the last one processed */ - && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next) - && ioat_chan->pending == 0) { - - /* - * check CHANSTS register for completed - * descriptor address. - * if it is different than completion writeback, - * it is not zero - * and it has changed since the last watchdog - * we can assume that channel - * is still working correctly - * and the problem is in completion writeback. - * update completion writeback - * with actual CHANSTS value - * else - * try resetting the channel - */ - - completion_hw.low = readl(ioat_chan->reg_base + - IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version)); - completion_hw.high = readl(ioat_chan->reg_base + - IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version)); -#if (BITS_PER_LONG == 64) - compl_desc_addr_hw = - completion_hw.full - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; -#else - compl_desc_addr_hw = - completion_hw.low & IOAT_LOW_COMPLETION_MASK; -#endif - - if ((compl_desc_addr_hw != 0) - && (compl_desc_addr_hw != ioat_chan->watchdog_completion) - && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) { - ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw; - ioat_chan->completion_virt->low = completion_hw.low; - ioat_chan->completion_virt->high = completion_hw.high; - } else { - ioat_dma_reset_channel(ioat_chan); - ioat_chan->watchdog_completion = 0; - ioat_chan->last_compl_desc_addr_hw = 0; - } - - /* - * for version 2.0 if there are descriptors yet to be processed - * and the last completed hasn't changed since the last watchdog - * if they haven't hit the pending level - * issue the pending to push them through - * else - * try resetting the channel - */ - } else if (ioat_chan->device->version == IOAT_VER_2_0 - && ioat_chan->used_desc.prev - && ioat_chan->last_completion - && ioat_chan->last_completion == ioat_chan->watchdog_completion) { - - if (ioat_chan->pending < ioat_pending_level) - ioat2_dma_memcpy_issue_pending(&ioat_chan->common); - else { - ioat_dma_reset_channel(ioat_chan); - ioat_chan->watchdog_completion = 0; - } - } else { - ioat_chan->last_compl_desc_addr_hw = 0; - ioat_chan->watchdog_completion - = ioat_chan->last_completion; - } - - ioat_chan->watchdog_last_tcp_cookie = - ioat_chan->watchdog_tcp_cookie; - } - - schedule_delayed_work(&device->work, WATCHDOG_DELAY); -} - -static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); - struct ioat_desc_sw *first = tx_to_ioat_desc(tx); - struct ioat_desc_sw *prev, *new; - struct ioat_dma_descriptor *hw; - dma_cookie_t cookie; - LIST_HEAD(new_chain); - u32 copy; - size_t len; - dma_addr_t src, dst; - unsigned long orig_flags; - unsigned int desc_count = 0; - - /* src and dest and len are stored in the initial descriptor */ - len = first->len; - src = first->src; - dst = first->dst; - orig_flags = first->async_tx.flags; - new = first; - - spin_lock_bh(&ioat_chan->desc_lock); - prev = to_ioat_desc(ioat_chan->used_desc.prev); - prefetch(prev->hw); - do { - copy = min_t(size_t, len, ioat_chan->xfercap); - - async_tx_ack(&new->async_tx); - - hw = new->hw; - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dst; - hw->next = 0; - - /* chain together the physical address list for the HW */ - wmb(); - prev->hw->next = (u64) new->async_tx.phys; - - len -= copy; - dst += copy; - src += copy; - - list_add_tail(&new->node, &new_chain); - desc_count++; - prev = new; - } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); - - if (!new) { - dev_err(&ioat_chan->device->pdev->dev, - "tx submit failed\n"); - spin_unlock_bh(&ioat_chan->desc_lock); - return -ENOMEM; - } - - hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - if (first->async_tx.callback) { - hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; - if (first != new) { - /* move callback into to last desc */ - new->async_tx.callback = first->async_tx.callback; - new->async_tx.callback_param - = first->async_tx.callback_param; - first->async_tx.callback = NULL; - first->async_tx.callback_param = NULL; - } - } - - new->tx_cnt = desc_count; - new->async_tx.flags = orig_flags; /* client is in control of this ack */ - - /* store the original values for use in later cleanup */ - if (new != first) { - new->src = first->src; - new->dst = first->dst; - new->len = first->len; - } - - /* cookie incr and addition to used_list must be atomic */ - cookie = ioat_chan->common.cookie; - cookie++; - if (cookie < 0) - cookie = 1; - ioat_chan->common.cookie = new->async_tx.cookie = cookie; - - /* write address into NextDescriptor field of last desc in chain */ - to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = - first->async_tx.phys; - list_splice_tail(&new_chain, &ioat_chan->used_desc); - - ioat_chan->dmacount += desc_count; - ioat_chan->pending += desc_count; - if (ioat_chan->pending >= ioat_pending_level) - __ioat1_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - - return cookie; -} - -static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); - struct ioat_desc_sw *first = tx_to_ioat_desc(tx); - struct ioat_desc_sw *new; - struct ioat_dma_descriptor *hw; - dma_cookie_t cookie; - u32 copy; - size_t len; - dma_addr_t src, dst; - unsigned long orig_flags; - unsigned int desc_count = 0; - - /* src and dest and len are stored in the initial descriptor */ - len = first->len; - src = first->src; - dst = first->dst; - orig_flags = first->async_tx.flags; - new = first; - - /* - * ioat_chan->desc_lock is still in force in version 2 path - * it gets unlocked at end of this function - */ - do { - copy = min_t(size_t, len, ioat_chan->xfercap); - - async_tx_ack(&new->async_tx); - - hw = new->hw; - hw->size = copy; - hw->ctl = 0; - hw->src_addr = src; - hw->dst_addr = dst; - - len -= copy; - dst += copy; - src += copy; - desc_count++; - } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); - - if (!new) { - dev_err(&ioat_chan->device->pdev->dev, - "tx submit failed\n"); - spin_unlock_bh(&ioat_chan->desc_lock); - return -ENOMEM; - } - - hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - if (first->async_tx.callback) { - hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; - if (first != new) { - /* move callback into to last desc */ - new->async_tx.callback = first->async_tx.callback; - new->async_tx.callback_param - = first->async_tx.callback_param; - first->async_tx.callback = NULL; - first->async_tx.callback_param = NULL; - } - } - - new->tx_cnt = desc_count; - new->async_tx.flags = orig_flags; /* client is in control of this ack */ - - /* store the original values for use in later cleanup */ - if (new != first) { - new->src = first->src; - new->dst = first->dst; - new->len = first->len; - } - - /* cookie incr and addition to used_list must be atomic */ - cookie = ioat_chan->common.cookie; - cookie++; - if (cookie < 0) - cookie = 1; - ioat_chan->common.cookie = new->async_tx.cookie = cookie; - - ioat_chan->dmacount += desc_count; - ioat_chan->pending += desc_count; - if (ioat_chan->pending >= ioat_pending_level) - __ioat2_dma_memcpy_issue_pending(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - - return cookie; -} - -/** - * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair - * @ioat_chan: the channel supplying the memory pool for the descriptors - * @flags: allocation flags - */ -static struct ioat_desc_sw *ioat_dma_alloc_descriptor( - struct ioat_dma_chan *ioat_chan, - gfp_t flags) -{ - struct ioat_dma_descriptor *desc; - struct ioat_desc_sw *desc_sw; - struct ioatdma_device *ioatdma_device; - dma_addr_t phys; - - ioatdma_device = to_ioatdma_device(ioat_chan->common.device); - desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); - if (unlikely(!desc)) - return NULL; - - desc_sw = kzalloc(sizeof(*desc_sw), flags); - if (unlikely(!desc_sw)) { - pci_pool_free(ioatdma_device->dma_pool, desc, phys); - return NULL; - } - - memset(desc, 0, sizeof(*desc)); - dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - desc_sw->async_tx.tx_submit = ioat1_tx_submit; - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - desc_sw->async_tx.tx_submit = ioat2_tx_submit; - break; - } - - desc_sw->hw = desc; - desc_sw->async_tx.phys = phys; - - return desc_sw; -} - -static int ioat_initial_desc_count = 256; -module_param(ioat_initial_desc_count, int, 0644); -MODULE_PARM_DESC(ioat_initial_desc_count, - "initial descriptors per channel (default: 256)"); - -/** - * ioat2_dma_massage_chan_desc - link the descriptors into a circle - * @ioat_chan: the channel to be massaged - */ -static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) -{ - struct ioat_desc_sw *desc, *_desc; - - /* setup used_desc */ - ioat_chan->used_desc.next = ioat_chan->free_desc.next; - ioat_chan->used_desc.prev = NULL; - - /* pull free_desc out of the circle so that every node is a hw - * descriptor, but leave it pointing to the list - */ - ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next; - ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev; - - /* circle link the hw descriptors */ - desc = to_ioat_desc(ioat_chan->free_desc.next); - desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; - list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { - desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; - } -} - -/** - * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors - * @chan: the channel to be filled out - */ -static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioat_desc_sw *desc; - u16 chanctrl; - u32 chanerr; - int i; - LIST_HEAD(tmp_list); - - /* have we already been set up? */ - if (!list_empty(&ioat_chan->free_desc)) - return ioat_chan->desccount; - - /* Setup register to interrupt and write completion status on error */ - chanctrl = IOAT_CHANCTRL_ERR_INT_EN | - IOAT_CHANCTRL_ANY_ERR_ABORT_EN | - IOAT_CHANCTRL_ERR_COMPLETION_EN; - writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); - - chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - if (chanerr) { - dev_err(&ioat_chan->device->pdev->dev, - "CHANERR = %x, clearing\n", chanerr); - writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); - } - - /* Allocate descriptors */ - for (i = 0; i < ioat_initial_desc_count; i++) { - desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); - if (!desc) { - dev_err(&ioat_chan->device->pdev->dev, - "Only %d initial descriptors\n", i); - break; - } - list_add_tail(&desc->node, &tmp_list); - } - spin_lock_bh(&ioat_chan->desc_lock); - ioat_chan->desccount = i; - list_splice(&tmp_list, &ioat_chan->free_desc); - if (ioat_chan->device->version != IOAT_VER_1_2) - ioat2_dma_massage_chan_desc(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - - /* allocate a completion writeback area */ - /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ - ioat_chan->completion_virt = - pci_pool_alloc(ioat_chan->device->completion_pool, - GFP_KERNEL, - &ioat_chan->completion_addr); - memset(ioat_chan->completion_virt, 0, - sizeof(*ioat_chan->completion_virt)); - writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); - writel(((u64) ioat_chan->completion_addr) >> 32, - ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - - tasklet_enable(&ioat_chan->cleanup_task); - ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */ - return ioat_chan->desccount; -} - -/** - * ioat_dma_free_chan_resources - release all the descriptors - * @chan: the channel to be cleaned - */ -static void ioat_dma_free_chan_resources(struct dma_chan *chan) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device); - struct ioat_desc_sw *desc, *_desc; - int in_use_descs = 0; - - /* Before freeing channel resources first check - * if they have been previously allocated for this channel. - */ - if (ioat_chan->desccount == 0) - return; - - tasklet_disable(&ioat_chan->cleanup_task); - ioat_dma_memcpy_cleanup(ioat_chan); - - /* Delay 100ms after reset to allow internal DMA logic to quiesce - * before removing DMA descriptor resources. - */ - writeb(IOAT_CHANCMD_RESET, - ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); - mdelay(100); - - spin_lock_bh(&ioat_chan->desc_lock); - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - list_for_each_entry_safe(desc, _desc, - &ioat_chan->used_desc, node) { - in_use_descs++; - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - } - list_for_each_entry_safe(desc, _desc, - &ioat_chan->free_desc, node) { - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - } - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - list_for_each_entry_safe(desc, _desc, - ioat_chan->free_desc.next, node) { - list_del(&desc->node); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - } - desc = to_ioat_desc(ioat_chan->free_desc.next); - pci_pool_free(ioatdma_device->dma_pool, desc->hw, - desc->async_tx.phys); - kfree(desc); - INIT_LIST_HEAD(&ioat_chan->free_desc); - INIT_LIST_HEAD(&ioat_chan->used_desc); - break; - } - spin_unlock_bh(&ioat_chan->desc_lock); - - pci_pool_free(ioatdma_device->completion_pool, - ioat_chan->completion_virt, - ioat_chan->completion_addr); - - /* one is ok since we left it on there on purpose */ - if (in_use_descs > 1) - dev_err(&ioat_chan->device->pdev->dev, - "Freeing %d in use descriptors!\n", - in_use_descs - 1); - - ioat_chan->last_completion = ioat_chan->completion_addr = 0; - ioat_chan->pending = 0; - ioat_chan->dmacount = 0; - ioat_chan->desccount = 0; - ioat_chan->watchdog_completion = 0; - ioat_chan->last_compl_desc_addr_hw = 0; - ioat_chan->watchdog_tcp_cookie = - ioat_chan->watchdog_last_tcp_cookie = 0; -} - -/** - * ioat_dma_get_next_descriptor - return the next available descriptor - * @ioat_chan: IOAT DMA channel handle - * - * Gets the next descriptor from the chain, and must be called with the - * channel's desc_lock held. Allocates more descriptors if the channel - * has run out. - */ -static struct ioat_desc_sw * -ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) -{ - struct ioat_desc_sw *new; - - if (!list_empty(&ioat_chan->free_desc)) { - new = to_ioat_desc(ioat_chan->free_desc.next); - list_del(&new->node); - } else { - /* try to get another desc */ - new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); - if (!new) { - dev_err(&ioat_chan->device->pdev->dev, - "alloc failed\n"); - return NULL; - } - } - - prefetch(new->hw); - return new; -} - -static struct ioat_desc_sw * -ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) -{ - struct ioat_desc_sw *new; - - /* - * used.prev points to where to start processing - * used.next points to next free descriptor - * if used.prev == NULL, there are none waiting to be processed - * if used.next == used.prev.prev, there is only one free descriptor, - * and we need to use it to as a noop descriptor before - * linking in a new set of descriptors, since the device - * has probably already read the pointer to it - */ - if (ioat_chan->used_desc.prev && - ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) { - - struct ioat_desc_sw *desc; - struct ioat_desc_sw *noop_desc; - int i; - - /* set up the noop descriptor */ - noop_desc = to_ioat_desc(ioat_chan->used_desc.next); - /* set size to non-zero value (channel returns error when size is 0) */ - noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; - noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; - noop_desc->hw->src_addr = 0; - noop_desc->hw->dst_addr = 0; - - ioat_chan->used_desc.next = ioat_chan->used_desc.next->next; - ioat_chan->pending++; - ioat_chan->dmacount++; - - /* try to get a few more descriptors */ - for (i = 16; i; i--) { - desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); - if (!desc) { - dev_err(&ioat_chan->device->pdev->dev, - "alloc failed\n"); - break; - } - list_add_tail(&desc->node, ioat_chan->used_desc.next); - - desc->hw->next - = to_ioat_desc(desc->node.next)->async_tx.phys; - to_ioat_desc(desc->node.prev)->hw->next - = desc->async_tx.phys; - ioat_chan->desccount++; - } - - ioat_chan->used_desc.next = noop_desc->node.next; - } - new = to_ioat_desc(ioat_chan->used_desc.next); - prefetch(new); - ioat_chan->used_desc.next = new->node.next; - - if (ioat_chan->used_desc.prev == NULL) - ioat_chan->used_desc.prev = &new->node; - - prefetch(new->hw); - return new; -} - -static struct ioat_desc_sw *ioat_dma_get_next_descriptor( - struct ioat_dma_chan *ioat_chan) -{ - if (!ioat_chan) - return NULL; - - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - return ioat1_dma_get_next_descriptor(ioat_chan); - case IOAT_VER_2_0: - case IOAT_VER_3_0: - return ioat2_dma_get_next_descriptor(ioat_chan); - } - return NULL; -} - -static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( - struct dma_chan *chan, - dma_addr_t dma_dest, - dma_addr_t dma_src, - size_t len, - unsigned long flags) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioat_desc_sw *new; - - spin_lock_bh(&ioat_chan->desc_lock); - new = ioat_dma_get_next_descriptor(ioat_chan); - spin_unlock_bh(&ioat_chan->desc_lock); - - if (new) { - new->len = len; - new->dst = dma_dest; - new->src = dma_src; - new->async_tx.flags = flags; - return &new->async_tx; - } else { - dev_err(&ioat_chan->device->pdev->dev, - "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", - chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); - return NULL; - } -} - -static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( - struct dma_chan *chan, - dma_addr_t dma_dest, - dma_addr_t dma_src, - size_t len, - unsigned long flags) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - struct ioat_desc_sw *new; - - spin_lock_bh(&ioat_chan->desc_lock); - new = ioat2_dma_get_next_descriptor(ioat_chan); - - /* - * leave ioat_chan->desc_lock set in ioat 2 path - * it will get unlocked at end of tx_submit - */ - - if (new) { - new->len = len; - new->dst = dma_dest; - new->src = dma_src; - new->async_tx.flags = flags; - return &new->async_tx; - } else { - spin_unlock_bh(&ioat_chan->desc_lock); - dev_err(&ioat_chan->device->pdev->dev, - "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", - chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); - return NULL; - } -} - -static void ioat_dma_cleanup_tasklet(unsigned long data) -{ - struct ioat_dma_chan *chan = (void *)data; - ioat_dma_memcpy_cleanup(chan); - writew(IOAT_CHANCTRL_INT_DISABLE, - chan->reg_base + IOAT_CHANCTRL_OFFSET); -} - -static void -ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) -{ - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE) - pci_unmap_single(ioat_chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - else - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - } - - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE) - pci_unmap_single(ioat_chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - else - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - } -} - -/** - * ioat_dma_memcpy_cleanup - cleanup up finished descriptors - * @chan: ioat channel to be cleaned up - */ -static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) -{ - unsigned long phys_complete; - struct ioat_desc_sw *desc, *_desc; - dma_cookie_t cookie = 0; - unsigned long desc_phys; - struct ioat_desc_sw *latest_desc; - - prefetch(ioat_chan->completion_virt); - - if (!spin_trylock_bh(&ioat_chan->cleanup_lock)) - return; - - /* The completion writeback can happen at any time, - so reads by the driver need to be atomic operations - The descriptor physical addresses are limited to 32-bits - when the CPU can only do a 32-bit mov */ - -#if (BITS_PER_LONG == 64) - phys_complete = - ioat_chan->completion_virt->full - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; -#else - phys_complete = - ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; -#endif - - if ((ioat_chan->completion_virt->full - & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == - IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { - dev_err(&ioat_chan->device->pdev->dev, - "Channel halted, chanerr = %x\n", - readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET)); - - /* TODO do something to salvage the situation */ - } - - if (phys_complete == ioat_chan->last_completion) { - spin_unlock_bh(&ioat_chan->cleanup_lock); - /* - * perhaps we're stuck so hard that the watchdog can't go off? - * try to catch it after 2 seconds - */ - if (ioat_chan->device->version != IOAT_VER_3_0) { - if (time_after(jiffies, - ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { - ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); - ioat_chan->last_completion_time = jiffies; - } - } - return; - } - ioat_chan->last_completion_time = jiffies; - - cookie = 0; - if (!spin_trylock_bh(&ioat_chan->desc_lock)) { - spin_unlock_bh(&ioat_chan->cleanup_lock); - return; - } - - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - list_for_each_entry_safe(desc, _desc, - &ioat_chan->used_desc, node) { - - /* - * Incoming DMA requests may use multiple descriptors, - * due to exceeding xfercap, perhaps. If so, only the - * last one will have a cookie, and require unmapping. - */ - if (desc->async_tx.cookie) { - cookie = desc->async_tx.cookie; - ioat_dma_unmap(ioat_chan, desc); - if (desc->async_tx.callback) { - desc->async_tx.callback(desc->async_tx.callback_param); - desc->async_tx.callback = NULL; - } - } - - if (desc->async_tx.phys != phys_complete) { - /* - * a completed entry, but not the last, so clean - * up if the client is done with the descriptor - */ - if (async_tx_test_ack(&desc->async_tx)) { - list_move_tail(&desc->node, - &ioat_chan->free_desc); - } else - desc->async_tx.cookie = 0; - } else { - /* - * last used desc. Do not remove, so we can - * append from it, but don't look at it next - * time, either - */ - desc->async_tx.cookie = 0; - - /* TODO check status bits? */ - break; - } - } - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - /* has some other thread has already cleaned up? */ - if (ioat_chan->used_desc.prev == NULL) - break; - - /* work backwards to find latest finished desc */ - desc = to_ioat_desc(ioat_chan->used_desc.next); - latest_desc = NULL; - do { - desc = to_ioat_desc(desc->node.prev); - desc_phys = (unsigned long)desc->async_tx.phys - & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; - if (desc_phys == phys_complete) { - latest_desc = desc; - break; - } - } while (&desc->node != ioat_chan->used_desc.prev); - - if (latest_desc != NULL) { - - /* work forwards to clear finished descriptors */ - for (desc = to_ioat_desc(ioat_chan->used_desc.prev); - &desc->node != latest_desc->node.next && - &desc->node != ioat_chan->used_desc.next; - desc = to_ioat_desc(desc->node.next)) { - if (desc->async_tx.cookie) { - cookie = desc->async_tx.cookie; - desc->async_tx.cookie = 0; - ioat_dma_unmap(ioat_chan, desc); - if (desc->async_tx.callback) { - desc->async_tx.callback(desc->async_tx.callback_param); - desc->async_tx.callback = NULL; - } - } - } - - /* move used.prev up beyond those that are finished */ - if (&desc->node == ioat_chan->used_desc.next) - ioat_chan->used_desc.prev = NULL; - else - ioat_chan->used_desc.prev = &desc->node; - } - break; - } - - spin_unlock_bh(&ioat_chan->desc_lock); - - ioat_chan->last_completion = phys_complete; - if (cookie != 0) - ioat_chan->completed_cookie = cookie; - - spin_unlock_bh(&ioat_chan->cleanup_lock); -} - -/** - * ioat_dma_is_complete - poll the status of a IOAT DMA transaction - * @chan: IOAT DMA channel handle - * @cookie: DMA transaction identifier - * @done: if not %NULL, updated with last completed transaction - * @used: if not %NULL, updated with last used transaction - */ -static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, - dma_cookie_t cookie, - dma_cookie_t *done, - dma_cookie_t *used) -{ - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - dma_cookie_t last_used; - dma_cookie_t last_complete; - enum dma_status ret; - - last_used = chan->cookie; - last_complete = ioat_chan->completed_cookie; - ioat_chan->watchdog_tcp_cookie = cookie; - - if (done) - *done = last_complete; - if (used) - *used = last_used; - - ret = dma_async_is_complete(cookie, last_complete, last_used); - if (ret == DMA_SUCCESS) - return ret; - - ioat_dma_memcpy_cleanup(ioat_chan); - - last_used = chan->cookie; - last_complete = ioat_chan->completed_cookie; - - if (done) - *done = last_complete; - if (used) - *used = last_used; - - return dma_async_is_complete(cookie, last_complete, last_used); -} - -static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) -{ - struct ioat_desc_sw *desc; - - spin_lock_bh(&ioat_chan->desc_lock); - - desc = ioat_dma_get_next_descriptor(ioat_chan); - - if (!desc) { - dev_err(&ioat_chan->device->pdev->dev, - "Unable to start null desc - get next desc failed\n"); - spin_unlock_bh(&ioat_chan->desc_lock); - return; - } - - desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL - | IOAT_DMA_DESCRIPTOR_CTL_INT_GN - | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - /* set size to non-zero value (channel returns error when size is 0) */ - desc->hw->size = NULL_DESC_BUFFER_SIZE; - desc->hw->src_addr = 0; - desc->hw->dst_addr = 0; - async_tx_ack(&desc->async_tx); - switch (ioat_chan->device->version) { - case IOAT_VER_1_2: - desc->hw->next = 0; - list_add_tail(&desc->node, &ioat_chan->used_desc); - - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); - - writeb(IOAT_CHANCMD_START, ioat_chan->reg_base - + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); - writel(((u64) desc->async_tx.phys) >> 32, - ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); - - ioat_chan->dmacount++; - __ioat2_dma_memcpy_issue_pending(ioat_chan); - break; - } - spin_unlock_bh(&ioat_chan->desc_lock); -} - -/* - * Perform a IOAT transaction to verify the HW works. - */ -#define IOAT_TEST_SIZE 2000 - -static void ioat_dma_test_callback(void *dma_async_param) -{ - struct completion *cmp = dma_async_param; - - complete(cmp); -} - -/** - * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. - * @device: device to be tested - */ -static int ioat_dma_self_test(struct ioatdma_device *device) -{ - int i; - u8 *src; - u8 *dest; - struct dma_chan *dma_chan; - struct dma_async_tx_descriptor *tx; - dma_addr_t dma_dest, dma_src; - dma_cookie_t cookie; - int err = 0; - struct completion cmp; - unsigned long tmo; - unsigned long flags; - - src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); - if (!src) - return -ENOMEM; - dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); - if (!dest) { - kfree(src); - return -ENOMEM; - } - - /* Fill in src buffer */ - for (i = 0; i < IOAT_TEST_SIZE; i++) - src[i] = (u8)i; - - /* Start copy, using first DMA channel */ - dma_chan = container_of(device->common.channels.next, - struct dma_chan, - device_node); - if (device->common.device_alloc_chan_resources(dma_chan) < 1) { - dev_err(&device->pdev->dev, - "selftest cannot allocate chan resource\n"); - err = -ENODEV; - goto out; - } - - dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, - DMA_TO_DEVICE); - dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, - DMA_FROM_DEVICE); - flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE; - tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, - IOAT_TEST_SIZE, flags); - if (!tx) { - dev_err(&device->pdev->dev, - "Self-test prep failed, disabling\n"); - err = -ENODEV; - goto free_resources; - } - - async_tx_ack(tx); - init_completion(&cmp); - tx->callback = ioat_dma_test_callback; - tx->callback_param = &cmp; - cookie = tx->tx_submit(tx); - if (cookie < 0) { - dev_err(&device->pdev->dev, - "Self-test setup failed, disabling\n"); - err = -ENODEV; - goto free_resources; - } - device->common.device_issue_pending(dma_chan); - - tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); - - if (tmo == 0 || - device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL) - != DMA_SUCCESS) { - dev_err(&device->pdev->dev, - "Self-test copy timed out, disabling\n"); - err = -ENODEV; - goto free_resources; - } - if (memcmp(src, dest, IOAT_TEST_SIZE)) { - dev_err(&device->pdev->dev, - "Self-test copy failed compare, disabling\n"); - err = -ENODEV; - goto free_resources; - } - -free_resources: - device->common.device_free_chan_resources(dma_chan); -out: - kfree(src); - kfree(dest); - return err; -} - -static char ioat_interrupt_style[32] = "msix"; -module_param_string(ioat_interrupt_style, ioat_interrupt_style, - sizeof(ioat_interrupt_style), 0644); -MODULE_PARM_DESC(ioat_interrupt_style, - "set ioat interrupt style: msix (default), " - "msix-single-vector, msi, intx)"); - -/** - * ioat_dma_setup_interrupts - setup interrupt handler - * @device: ioat device - */ -static int ioat_dma_setup_interrupts(struct ioatdma_device *device) -{ - struct ioat_dma_chan *ioat_chan; - int err, i, j, msixcnt; - u8 intrctrl = 0; - - if (!strcmp(ioat_interrupt_style, "msix")) - goto msix; - if (!strcmp(ioat_interrupt_style, "msix-single-vector")) - goto msix_single_vector; - if (!strcmp(ioat_interrupt_style, "msi")) - goto msi; - if (!strcmp(ioat_interrupt_style, "intx")) - goto intx; - dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n", - ioat_interrupt_style); - goto err_no_irq; - -msix: - /* The number of MSI-X vectors should equal the number of channels */ - msixcnt = device->common.chancnt; - for (i = 0; i < msixcnt; i++) - device->msix_entries[i].entry = i; - - err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt); - if (err < 0) - goto msi; - if (err > 0) - goto msix_single_vector; - - for (i = 0; i < msixcnt; i++) { - ioat_chan = ioat_lookup_chan_by_index(device, i); - err = request_irq(device->msix_entries[i].vector, - ioat_dma_do_interrupt_msix, - 0, "ioat-msix", ioat_chan); - if (err) { - for (j = 0; j < i; j++) { - ioat_chan = - ioat_lookup_chan_by_index(device, j); - free_irq(device->msix_entries[j].vector, - ioat_chan); - } - goto msix_single_vector; - } - } - intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; - device->irq_mode = msix_multi_vector; - goto done; - -msix_single_vector: - device->msix_entries[0].entry = 0; - err = pci_enable_msix(device->pdev, device->msix_entries, 1); - if (err) - goto msi; - - err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt, - 0, "ioat-msix", device); - if (err) { - pci_disable_msix(device->pdev); - goto msi; - } - device->irq_mode = msix_single_vector; - goto done; - -msi: - err = pci_enable_msi(device->pdev); - if (err) - goto intx; - - err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, - 0, "ioat-msi", device); - if (err) { - pci_disable_msi(device->pdev); - goto intx; - } - /* - * CB 1.2 devices need a bit set in configuration space to enable MSI - */ - if (device->version == IOAT_VER_1_2) { - u32 dmactrl; - pci_read_config_dword(device->pdev, - IOAT_PCI_DMACTRL_OFFSET, &dmactrl); - dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; - pci_write_config_dword(device->pdev, - IOAT_PCI_DMACTRL_OFFSET, dmactrl); - } - device->irq_mode = msi; - goto done; - -intx: - err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, - IRQF_SHARED, "ioat-intx", device); - if (err) - goto err_no_irq; - device->irq_mode = intx; - -done: - intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; - writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); - return 0; - -err_no_irq: - /* Disable all interrupt generation */ - writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); - dev_err(&device->pdev->dev, "no usable interrupts\n"); - device->irq_mode = none; - return -1; -} - -/** - * ioat_dma_remove_interrupts - remove whatever interrupts were set - * @device: ioat device - */ -static void ioat_dma_remove_interrupts(struct ioatdma_device *device) -{ - struct ioat_dma_chan *ioat_chan; - int i; - - /* Disable all interrupt generation */ - writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); - - switch (device->irq_mode) { - case msix_multi_vector: - for (i = 0; i < device->common.chancnt; i++) { - ioat_chan = ioat_lookup_chan_by_index(device, i); - free_irq(device->msix_entries[i].vector, ioat_chan); - } - pci_disable_msix(device->pdev); - break; - case msix_single_vector: - free_irq(device->msix_entries[0].vector, device); - pci_disable_msix(device->pdev); - break; - case msi: - free_irq(device->pdev->irq, device); - pci_disable_msi(device->pdev); - break; - case intx: - free_irq(device->pdev->irq, device); - break; - case none: - dev_warn(&device->pdev->dev, - "call to %s without interrupts setup\n", __func__); - } - device->irq_mode = none; -} - -struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, - void __iomem *iobase) -{ - int err; - struct ioatdma_device *device; - - device = kzalloc(sizeof(*device), GFP_KERNEL); - if (!device) { - err = -ENOMEM; - goto err_kzalloc; - } - device->pdev = pdev; - device->reg_base = iobase; - device->version = readb(device->reg_base + IOAT_VER_OFFSET); - - /* DMA coherent memory pool for DMA descriptor allocations */ - device->dma_pool = pci_pool_create("dma_desc_pool", pdev, - sizeof(struct ioat_dma_descriptor), - 64, 0); - if (!device->dma_pool) { - err = -ENOMEM; - goto err_dma_pool; - } - - device->completion_pool = pci_pool_create("completion_pool", pdev, - sizeof(u64), SMP_CACHE_BYTES, - SMP_CACHE_BYTES); - if (!device->completion_pool) { - err = -ENOMEM; - goto err_completion_pool; - } - - INIT_LIST_HEAD(&device->common.channels); - ioat_dma_enumerate_channels(device); - - device->common.device_alloc_chan_resources = - ioat_dma_alloc_chan_resources; - device->common.device_free_chan_resources = - ioat_dma_free_chan_resources; - device->common.dev = &pdev->dev; - - dma_cap_set(DMA_MEMCPY, device->common.cap_mask); - device->common.device_is_tx_complete = ioat_dma_is_complete; - switch (device->version) { - case IOAT_VER_1_2: - device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy; - device->common.device_issue_pending = - ioat1_dma_memcpy_issue_pending; - break; - case IOAT_VER_2_0: - case IOAT_VER_3_0: - device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; - device->common.device_issue_pending = - ioat2_dma_memcpy_issue_pending; - break; - } - - dev_err(&device->pdev->dev, - "Intel(R) I/OAT DMA Engine found," - " %d channels, device version 0x%02x, driver version %s\n", - device->common.chancnt, device->version, IOAT_DMA_VERSION); - - if (!device->common.chancnt) { - dev_err(&device->pdev->dev, - "Intel(R) I/OAT DMA Engine problem found: " - "zero channels detected\n"); - goto err_setup_interrupts; - } - - err = ioat_dma_setup_interrupts(device); - if (err) - goto err_setup_interrupts; - - err = ioat_dma_self_test(device); - if (err) - goto err_self_test; - - ioat_set_tcp_copy_break(device); - - dma_async_device_register(&device->common); - - if (device->version != IOAT_VER_3_0) { - INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); - schedule_delayed_work(&device->work, - WATCHDOG_DELAY); - } - - return device; - -err_self_test: - ioat_dma_remove_interrupts(device); -err_setup_interrupts: - pci_pool_destroy(device->completion_pool); -err_completion_pool: - pci_pool_destroy(device->dma_pool); -err_dma_pool: - kfree(device); -err_kzalloc: - dev_err(&pdev->dev, - "Intel(R) I/OAT DMA Engine initialization failed\n"); - return NULL; -} - -void ioat_dma_remove(struct ioatdma_device *device) -{ - struct dma_chan *chan, *_chan; - struct ioat_dma_chan *ioat_chan; - - if (device->version != IOAT_VER_3_0) - cancel_delayed_work(&device->work); - - ioat_dma_remove_interrupts(device); - - dma_async_device_unregister(&device->common); - - pci_pool_destroy(device->dma_pool); - pci_pool_destroy(device->completion_pool); - - iounmap(device->reg_base); - pci_release_regions(device->pdev); - pci_disable_device(device->pdev); - - list_for_each_entry_safe(chan, _chan, - &device->common.channels, device_node) { - ioat_chan = to_ioat_chan(chan); - list_del(&chan->device_node); - kfree(ioat_chan); - } - kfree(device); -} - diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h deleted file mode 100644 index a52ff4bd460..00000000000 --- a/drivers/dma/ioatdma.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ -#ifndef IOATDMA_H -#define IOATDMA_H - -#include <linux/dmaengine.h> -#include "ioatdma_hw.h" -#include <linux/init.h> -#include <linux/dmapool.h> -#include <linux/cache.h> -#include <linux/pci_ids.h> -#include <net/tcp.h> - -#define IOAT_DMA_VERSION "3.64" - -enum ioat_interrupt { - none = 0, - msix_multi_vector = 1, - msix_single_vector = 2, - msi = 3, - intx = 4, -}; - -#define IOAT_LOW_COMPLETION_MASK 0xffffffc0 -#define IOAT_DMA_DCA_ANY_CPU ~0 -#define IOAT_WATCHDOG_PERIOD (2 * HZ) - - -/** - * struct ioatdma_device - internal representation of a IOAT device - * @pdev: PCI-Express device - * @reg_base: MMIO register space base address - * @dma_pool: for allocating DMA descriptors - * @common: embedded struct dma_device - * @version: version of ioatdma device - * @irq_mode: which style irq to use - * @msix_entries: irq handlers - * @idx: per channel data - */ - -struct ioatdma_device { - struct pci_dev *pdev; - void __iomem *reg_base; - struct pci_pool *dma_pool; - struct pci_pool *completion_pool; - struct dma_device common; - u8 version; - enum ioat_interrupt irq_mode; - struct delayed_work work; - struct msix_entry msix_entries[4]; - struct ioat_dma_chan *idx[4]; -}; - -/** - * struct ioat_dma_chan - internal representation of a DMA channel - */ -struct ioat_dma_chan { - - void __iomem *reg_base; - - dma_cookie_t completed_cookie; - unsigned long last_completion; - unsigned long last_completion_time; - - size_t xfercap; /* XFERCAP register value expanded out */ - - spinlock_t cleanup_lock; - spinlock_t desc_lock; - struct list_head free_desc; - struct list_head used_desc; - unsigned long watchdog_completion; - int watchdog_tcp_cookie; - u32 watchdog_last_tcp_cookie; - struct delayed_work work; - - int pending; - int dmacount; - int desccount; - - struct ioatdma_device *device; - struct dma_chan common; - - dma_addr_t completion_addr; - union { - u64 full; /* HW completion writeback */ - struct { - u32 low; - u32 high; - }; - } *completion_virt; - unsigned long last_compl_desc_addr_hw; - struct tasklet_struct cleanup_task; -}; - -/* wrapper around hardware descriptor format + additional software fields */ - -/** - * struct ioat_desc_sw - wrapper around hardware descriptor - * @hw: hardware DMA descriptor - * @node: this descriptor will either be on the free list, - * or attached to a transaction list (async_tx.tx_list) - * @tx_cnt: number of descriptors required to complete the transaction - * @async_tx: the generic software descriptor for all engines - */ -struct ioat_desc_sw { - struct ioat_dma_descriptor *hw; - struct list_head node; - int tx_cnt; - size_t len; - dma_addr_t src; - dma_addr_t dst; - struct dma_async_tx_descriptor async_tx; -}; - -static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) -{ - #ifdef CONFIG_NET_DMA - switch (dev->version) { - case IOAT_VER_1_2: - sysctl_tcp_dma_copybreak = 4096; - break; - case IOAT_VER_2_0: - sysctl_tcp_dma_copybreak = 2048; - break; - case IOAT_VER_3_0: - sysctl_tcp_dma_copybreak = 262144; - break; - } - #endif -} - -#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE) -struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, - void __iomem *iobase); -void ioat_dma_remove(struct ioatdma_device *device); -struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); -struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); -#else -#define ioat_dma_probe(pdev, iobase) NULL -#define ioat_dma_remove(device) do { } while (0) -#define ioat_dca_init(pdev, iobase) NULL -#define ioat2_dca_init(pdev, iobase) NULL -#define ioat3_dca_init(pdev, iobase) NULL -#endif - -#endif /* IOATDMA_H */ diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h deleted file mode 100644 index afa57eef86c..00000000000 --- a/drivers/dma/ioatdma_hw.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * The full GNU General Public License is included in this distribution in the - * file called COPYING. - */ -#ifndef _IOAT_HW_H_ -#define _IOAT_HW_H_ - -/* PCI Configuration Space Values */ -#define IOAT_PCI_VID 0x8086 - -/* CB device ID's */ -#define IOAT_PCI_DID_5000 0x1A38 -#define IOAT_PCI_DID_CNB 0x360B -#define IOAT_PCI_DID_SCNB 0x65FF -#define IOAT_PCI_DID_SNB 0x402F - -#define IOAT_PCI_RID 0x00 -#define IOAT_PCI_SVID 0x8086 -#define IOAT_PCI_SID 0x8086 -#define IOAT_VER_1_2 0x12 /* Version 1.2 */ -#define IOAT_VER_2_0 0x20 /* Version 2.0 */ -#define IOAT_VER_3_0 0x30 /* Version 3.0 */ - -struct ioat_dma_descriptor { - uint32_t size; - uint32_t ctl; - uint64_t src_addr; - uint64_t dst_addr; - uint64_t next; - uint64_t rsv1; - uint64_t rsv2; - uint64_t user1; - uint64_t user2; -}; - -#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001 -#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002 -#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004 -#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008 -#define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010 -#define IOAT_DMA_DESCRIPTOR_NUL 0x00000020 -#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040 -#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080 -#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100 -#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200 -#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400 - -#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000 -#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000 - -#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001 -#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000 - -#endif diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 2f052265122..645ca8d54ec 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -31,6 +31,7 @@ #include <linux/platform_device.h> #include <linux/memory.h> #include <linux/ioport.h> +#include <linux/raid/pq.h> #include <mach/adma.h> @@ -57,65 +58,110 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot) } } +static void +iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc) +{ + struct dma_async_tx_descriptor *tx = &desc->async_tx; + struct iop_adma_desc_slot *unmap = desc->group_head; + struct device *dev = &iop_chan->device->pdev->dev; + u32 len = unmap->unmap_len; + enum dma_ctrl_flags flags = tx->flags; + u32 src_cnt; + dma_addr_t addr; + dma_addr_t dest; + + src_cnt = unmap->unmap_src_cnt; + dest = iop_desc_get_dest_addr(unmap, iop_chan); + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + enum dma_data_direction dir; + + if (src_cnt > 1) /* is xor? */ + dir = DMA_BIDIRECTIONAL; + else + dir = DMA_FROM_DEVICE; + + dma_unmap_page(dev, dest, len, dir); + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + while (src_cnt--) { + addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt); + if (addr == dest) + continue; + dma_unmap_page(dev, addr, len, DMA_TO_DEVICE); + } + } + desc->group_head = NULL; +} + +static void +iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc) +{ + struct dma_async_tx_descriptor *tx = &desc->async_tx; + struct iop_adma_desc_slot *unmap = desc->group_head; + struct device *dev = &iop_chan->device->pdev->dev; + u32 len = unmap->unmap_len; + enum dma_ctrl_flags flags = tx->flags; + u32 src_cnt = unmap->unmap_src_cnt; + dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan); + dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan); + int i; + + if (tx->flags & DMA_PREP_CONTINUE) + src_cnt -= 3; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) { + dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL); + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + dma_addr_t addr; + + for (i = 0; i < src_cnt; i++) { + addr = iop_desc_get_src_addr(unmap, iop_chan, i); + dma_unmap_page(dev, addr, len, DMA_TO_DEVICE); + } + if (desc->pq_check_result) { + dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE); + dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE); + } + } + + desc->group_head = NULL; +} + + static dma_cookie_t iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, struct iop_adma_chan *iop_chan, dma_cookie_t cookie) { - BUG_ON(desc->async_tx.cookie < 0); - if (desc->async_tx.cookie > 0) { - cookie = desc->async_tx.cookie; - desc->async_tx.cookie = 0; + struct dma_async_tx_descriptor *tx = &desc->async_tx; + + BUG_ON(tx->cookie < 0); + if (tx->cookie > 0) { + cookie = tx->cookie; + tx->cookie = 0; /* call the callback (must not sleep or submit new * operations to this channel) */ - if (desc->async_tx.callback) - desc->async_tx.callback( - desc->async_tx.callback_param); + if (tx->callback) + tx->callback(tx->callback_param); /* unmap dma addresses * (unmap_single vs unmap_page?) */ if (desc->group_head && desc->unmap_len) { - struct iop_adma_desc_slot *unmap = desc->group_head; - struct device *dev = - &iop_chan->device->pdev->dev; - u32 len = unmap->unmap_len; - enum dma_ctrl_flags flags = desc->async_tx.flags; - u32 src_cnt; - dma_addr_t addr; - dma_addr_t dest; - - src_cnt = unmap->unmap_src_cnt; - dest = iop_desc_get_dest_addr(unmap, iop_chan); - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - enum dma_data_direction dir; - - if (src_cnt > 1) /* is xor? */ - dir = DMA_BIDIRECTIONAL; - else - dir = DMA_FROM_DEVICE; - - dma_unmap_page(dev, dest, len, dir); - } - - if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - while (src_cnt--) { - addr = iop_desc_get_src_addr(unmap, - iop_chan, - src_cnt); - if (addr == dest) - continue; - dma_unmap_page(dev, addr, len, - DMA_TO_DEVICE); - } - } - desc->group_head = NULL; + if (iop_desc_is_pq(desc)) + iop_desc_unmap_pq(iop_chan, desc); + else + iop_desc_unmap(iop_chan, desc); } } /* run dependent operations */ - dma_run_dependencies(&desc->async_tx); + dma_run_dependencies(tx); return cookie; } @@ -287,7 +333,12 @@ static void iop_adma_tasklet(unsigned long data) { struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data; - spin_lock(&iop_chan->lock); + /* lockdep will flag depedency submissions as potentially + * recursive locking, this is not the case as a dependency + * submission will never recurse a channels submit routine. + * There are checks in async_tx.c to prevent this. + */ + spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING); __iop_adma_slot_cleanup(iop_chan); spin_unlock(&iop_chan->lock); } @@ -370,7 +421,7 @@ retry: } alloc_tail->group_head = alloc_start; alloc_tail->async_tx.cookie = -EBUSY; - list_splice(&chain, &alloc_tail->async_tx.tx_list); + list_splice(&chain, &alloc_tail->tx_list); iop_chan->last_used = last_used; iop_desc_clear_next_desc(alloc_start); iop_desc_clear_next_desc(alloc_tail); @@ -429,7 +480,7 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx) old_chain_tail = list_entry(iop_chan->chain.prev, struct iop_adma_desc_slot, chain_node); - list_splice_init(&sw_desc->async_tx.tx_list, + list_splice_init(&sw_desc->tx_list, &old_chain_tail->chain_node); /* fix up the hardware chain */ @@ -496,6 +547,7 @@ static int iop_adma_alloc_chan_resources(struct dma_chan *chan) dma_async_tx_descriptor_init(&slot->async_tx, chan); slot->async_tx.tx_submit = iop_adma_tx_submit; + INIT_LIST_HEAD(&slot->tx_list); INIT_LIST_HEAD(&slot->chain_node); INIT_LIST_HEAD(&slot->slot_node); hw_desc = (char *) iop_chan->device->dma_desc_pool; @@ -660,9 +712,9 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, } static struct dma_async_tx_descriptor * -iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, - unsigned int src_cnt, size_t len, u32 *result, - unsigned long flags) +iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src, + unsigned int src_cnt, size_t len, u32 *result, + unsigned long flags) { struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); struct iop_adma_desc_slot *sw_desc, *grp_start; @@ -696,6 +748,118 @@ iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, return sw_desc ? &sw_desc->async_tx : NULL; } +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *g; + int slot_cnt, slots_per_op; + int continue_srcs; + + if (unlikely(!len)) + return NULL; + BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); + + dev_dbg(iop_chan->device->common.dev, + "%s src_cnt: %d len: %u flags: %lx\n", + __func__, src_cnt, len, flags); + + if (dmaf_p_disabled_continue(flags)) + continue_srcs = 1+src_cnt; + else if (dmaf_continue(flags)) + continue_srcs = 3+src_cnt; + else + continue_srcs = 0+src_cnt; + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + int i; + + g = sw_desc->group_head; + iop_desc_set_byte_count(g, iop_chan, len); + + /* even if P is disabled its destination address (bits + * [3:0]) must match Q. It is ok if P points to an + * invalid address, it won't be written. + */ + if (flags & DMA_PREP_PQ_DISABLE_P) + dst[0] = dst[1] & 0x7; + + iop_desc_set_pq_addr(g, dst); + sw_desc->unmap_src_cnt = src_cnt; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + for (i = 0; i < src_cnt; i++) + iop_desc_set_pq_src_addr(g, i, src[i], scf[i]); + + /* if we are continuing a previous operation factor in + * the old p and q values, see the comment for dma_maxpq + * in include/linux/dmaengine.h + */ + if (dmaf_p_disabled_continue(flags)) + iop_desc_set_pq_src_addr(g, i++, dst[1], 1); + else if (dmaf_continue(flags)) { + iop_desc_set_pq_src_addr(g, i++, dst[0], 0); + iop_desc_set_pq_src_addr(g, i++, dst[1], 1); + iop_desc_set_pq_src_addr(g, i++, dst[1], 0); + } + iop_desc_init_pq(g, i, flags); + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, enum sum_check_flags *pqres, + unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *g; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); + + dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n", + __func__, src_cnt, len); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + /* for validate operations p and q are tagged onto the + * end of the source list + */ + int pq_idx = src_cnt; + + g = sw_desc->group_head; + iop_desc_init_pq_zero_sum(g, src_cnt+2, flags); + iop_desc_set_pq_zero_sum_byte_count(g, len); + g->pq_check_result = pqres; + pr_debug("\t%s: g->pq_check_result: %p\n", + __func__, g->pq_check_result); + sw_desc->unmap_src_cnt = src_cnt+2; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + while (src_cnt--) + iop_desc_set_pq_zero_sum_src_addr(g, src_cnt, + src[src_cnt], + scf[src_cnt]); + iop_desc_set_pq_zero_sum_addr(g, pq_idx, src); + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + static void iop_adma_free_chan_resources(struct dma_chan *chan) { struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); @@ -906,7 +1070,7 @@ out: #define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ static int __devinit -iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) +iop_adma_xor_val_self_test(struct iop_adma_device *device) { int i, src_idx; struct page *dest; @@ -1002,7 +1166,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) PAGE_SIZE, DMA_TO_DEVICE); /* skip zero sum if the capability is not present */ - if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) + if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) goto free_resources; /* zero sum the sources with the destintation page */ @@ -1016,10 +1180,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) dma_srcs[i] = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i], 0, PAGE_SIZE, DMA_TO_DEVICE); - tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, - IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, - &zero_sum_result, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, + IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, + &zero_sum_result, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); cookie = iop_adma_tx_submit(tx); iop_adma_issue_pending(dma_chan); @@ -1072,10 +1236,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) dma_srcs[i] = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i], 0, PAGE_SIZE, DMA_TO_DEVICE); - tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, - IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, - &zero_sum_result, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, + IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, + &zero_sum_result, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); cookie = iop_adma_tx_submit(tx); iop_adma_issue_pending(dma_chan); @@ -1105,6 +1269,170 @@ out: return err; } +#ifdef CONFIG_MD_RAID6_PQ +static int __devinit +iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device) +{ + /* combined sources, software pq results, and extra hw pq results */ + struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2]; + /* ptr to the extra hw pq buffers defined above */ + struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2]; + /* address conversion buffers (dma_map / page_address) */ + void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2]; + dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST]; + dma_addr_t pq_dest[2]; + + int i; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u32 zero_sum_result; + int err = 0; + struct device *dev; + + dev_dbg(device->common.dev, "%s\n", __func__); + + for (i = 0; i < ARRAY_SIZE(pq); i++) { + pq[i] = alloc_page(GFP_KERNEL); + if (!pq[i]) { + while (i--) + __free_page(pq[i]); + return -ENOMEM; + } + } + + /* Fill in src buffers */ + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) { + pq_sw[i] = page_address(pq[i]); + memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE); + } + pq_sw[i] = page_address(pq[i]); + pq_sw[i+1] = page_address(pq[i+1]); + + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (iop_adma_alloc_chan_resources(dma_chan) < 1) { + err = -ENODEV; + goto out; + } + + dev = dma_chan->device->dev; + + /* initialize the dests */ + memset(page_address(pq_hw[0]), 0 , PAGE_SIZE); + memset(page_address(pq_hw[1]), 0 , PAGE_SIZE); + + /* test pq */ + pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE); + pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) + pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + + tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src, + IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp, + PAGE_SIZE, + DMA_PREP_INTERRUPT | + DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_err(dev, "Self-test pq timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw); + + if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST], + page_address(pq_hw[0]), PAGE_SIZE) != 0) { + dev_err(dev, "Self-test p failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1], + page_address(pq_hw[1]), PAGE_SIZE) != 0) { + dev_err(dev, "Self-test q failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + /* test correct zero sum using the software generated pq values */ + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) + pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + + zero_sum_result = ~0; + tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], + pq_src, IOP_ADMA_NUM_SRC_TEST, + raid6_gfexp, PAGE_SIZE, &zero_sum_result, + DMA_PREP_INTERRUPT|DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != 0) { + dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n", + zero_sum_result); + err = -ENODEV; + goto free_resources; + } + + /* test incorrect zero sum */ + i = IOP_ADMA_NUM_SRC_TEST; + memset(pq_sw[i] + 100, 0, 100); + memset(pq_sw[i+1] + 200, 0, 200); + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) + pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + + zero_sum_result = 0; + tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], + pq_src, IOP_ADMA_NUM_SRC_TEST, + raid6_gfexp, PAGE_SIZE, &zero_sum_result, + DMA_PREP_INTERRUPT|DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) { + dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n", + zero_sum_result); + err = -ENODEV; + goto free_resources; + } + +free_resources: + iop_adma_free_chan_resources(dma_chan); +out: + i = ARRAY_SIZE(pq); + while (i--) + __free_page(pq[i]); + return err; +} +#endif + static int __devexit iop_adma_remove(struct platform_device *dev) { struct iop_adma_device *device = platform_get_drvdata(dev); @@ -1192,9 +1520,16 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) dma_dev->max_xor = iop_adma_get_max_xor(); dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; } - if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) - dma_dev->device_prep_dma_zero_sum = - iop_adma_prep_dma_zero_sum; + if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask)) + dma_dev->device_prep_dma_xor_val = + iop_adma_prep_dma_xor_val; + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { + dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0); + dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq; + } + if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) + dma_dev->device_prep_dma_pq_val = + iop_adma_prep_dma_pq_val; if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) dma_dev->device_prep_dma_interrupt = iop_adma_prep_dma_interrupt; @@ -1248,23 +1583,35 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) } if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || - dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { - ret = iop_adma_xor_zero_sum_self_test(adev); + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { + ret = iop_adma_xor_val_self_test(adev); dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); if (ret) goto err_free_iop_chan; } + if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) && + dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) { + #ifdef CONFIG_MD_RAID6_PQ + ret = iop_adma_pq_zero_sum_self_test(adev); + dev_dbg(&pdev->dev, "pq self test returned %d\n", ret); + #else + /* can not test raid6, so do not publish capability */ + dma_cap_clear(DMA_PQ, dma_dev->cap_mask); + dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask); + ret = 0; + #endif + if (ret) + goto err_free_iop_chan; + } + dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " - "( %s%s%s%s%s%s%s%s%s%s)\n", - dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", - dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", - dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "", + "( %s%s%s%s%s%s%s)\n", + dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", + dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", - dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", - dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "", + dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "", dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", - dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "", dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); @@ -1296,7 +1643,7 @@ static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan) if (sw_desc) { grp_start = sw_desc->group_head; - list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); + list_splice_init(&sw_desc->tx_list, &iop_chan->chain); async_tx_ack(&sw_desc->async_tx); iop_desc_init_memcpy(grp_start, 0); iop_desc_set_byte_count(grp_start, iop_chan, 0); @@ -1352,7 +1699,7 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan) sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); if (sw_desc) { grp_start = sw_desc->group_head; - list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); + list_splice_init(&sw_desc->tx_list, &iop_chan->chain); async_tx_ack(&sw_desc->async_tx); iop_desc_init_null_xor(grp_start, 2, 0); iop_desc_set_byte_count(grp_start, iop_chan, 0); diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c index 9f6fe46a9b8..c0a272c7368 100644 --- a/drivers/dma/iovlock.c +++ b/drivers/dma/iovlock.c @@ -183,6 +183,11 @@ dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov, iov_byte_offset, kdata, copy); + /* poll for a descriptor slot */ + if (unlikely(dma_cookie < 0)) { + dma_async_issue_pending(chan); + continue; + } len -= copy; iov[iovec_idx].iov_len -= copy; @@ -248,6 +253,11 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov, page, offset, copy); + /* poll for a descriptor slot */ + if (unlikely(dma_cookie < 0)) { + dma_async_issue_pending(chan); + continue; + } len -= copy; iov[iovec_idx].iov_len -= copy; diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 3f23eabe09f..466ab10c1ff 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -517,7 +517,7 @@ retry: } alloc_tail->group_head = alloc_start; alloc_tail->async_tx.cookie = -EBUSY; - list_splice(&chain, &alloc_tail->async_tx.tx_list); + list_splice(&chain, &alloc_tail->tx_list); mv_chan->last_used = last_used; mv_desc_clear_next_desc(alloc_start); mv_desc_clear_next_desc(alloc_tail); @@ -565,14 +565,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) cookie = mv_desc_assign_cookie(mv_chan, sw_desc); if (list_empty(&mv_chan->chain)) - list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain); + list_splice_init(&sw_desc->tx_list, &mv_chan->chain); else { new_hw_chain = 0; old_chain_tail = list_entry(mv_chan->chain.prev, struct mv_xor_desc_slot, chain_node); - list_splice_init(&grp_start->async_tx.tx_list, + list_splice_init(&grp_start->tx_list, &old_chain_tail->chain_node); if (!mv_can_chain(grp_start)) @@ -632,6 +632,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan) slot->async_tx.tx_submit = mv_xor_tx_submit; INIT_LIST_HEAD(&slot->chain_node); INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->tx_list); hw_desc = (char *) mv_chan->device->dma_desc_pool; slot->async_tx.phys = (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE]; diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index 06cafe1ef52..977b592e976 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -126,9 +126,8 @@ struct mv_xor_chan { * @idx: pool index * @unmap_src_cnt: number of xor sources * @unmap_len: transaction bytecount + * @tx_list: list of slots that make up a multi-descriptor transaction * @async_tx: support for the async_tx api - * @group_list: list of slots that make up a multi-descriptor transaction - * for example transfer lengths larger than the supported hw max * @xor_check_result: result of zero sum * @crc32_result: result crc calculation */ @@ -145,6 +144,7 @@ struct mv_xor_desc_slot { u16 unmap_src_cnt; u32 value; size_t unmap_len; + struct list_head tx_list; struct dma_async_tx_descriptor async_tx; union { u32 *xor_check_result; diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c new file mode 100644 index 00000000000..b3b065c4e5c --- /dev/null +++ b/drivers/dma/shdma.c @@ -0,0 +1,786 @@ +/* + * Renesas SuperH DMA Engine support + * + * base is drivers/dma/flsdma.c + * + * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com> + * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. + * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * - DMA of SuperH does not have Hardware DMA chain mode. + * - MAX DMA size is 16MB. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/platform_device.h> +#include <cpu/dma.h> +#include <asm/dma-sh.h> +#include "shdma.h" + +/* DMA descriptor control */ +#define DESC_LAST (-1) +#define DESC_COMP (1) +#define DESC_NCOMP (0) + +#define NR_DESCS_PER_CHANNEL 32 +/* + * Define the default configuration for dual address memory-memory transfer. + * The 0x400 value represents auto-request, external->external. + * + * And this driver set 4byte burst mode. + * If you want to change mode, you need to change RS_DEFAULT of value. + * (ex 1byte burst mode -> (RS_DUAL & ~TS_32) + */ +#define RS_DEFAULT (RS_DUAL) + +#define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id]) +static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg) +{ + ctrl_outl(data, (SH_DMAC_CHAN_BASE(sh_dc->id) + reg)); +} + +static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg) +{ + return ctrl_inl((SH_DMAC_CHAN_BASE(sh_dc->id) + reg)); +} + +static void dmae_init(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = RS_DEFAULT; /* default is DUAL mode */ + sh_dmae_writel(sh_chan, chcr, CHCR); +} + +/* + * Reset DMA controller + * + * SH7780 has two DMAOR register + */ +static void sh_dmae_ctl_stop(int id) +{ + unsigned short dmaor = dmaor_read_reg(id); + + dmaor &= ~(DMAOR_NMIF | DMAOR_AE); + dmaor_write_reg(id, dmaor); +} + +static int sh_dmae_rst(int id) +{ + unsigned short dmaor; + + sh_dmae_ctl_stop(id); + dmaor = (dmaor_read_reg(id)|DMAOR_INIT); + + dmaor_write_reg(id, dmaor); + if ((dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF))) { + pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n"); + return -EINVAL; + } + return 0; +} + +static int dmae_is_idle(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + if (chcr & CHCR_DE) { + if (!(chcr & CHCR_TE)) + return -EBUSY; /* working */ + } + return 0; /* waiting */ +} + +static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT]; +} + +static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw) +{ + sh_dmae_writel(sh_chan, hw.sar, SAR); + sh_dmae_writel(sh_chan, hw.dar, DAR); + sh_dmae_writel(sh_chan, + (hw.tcr >> calc_xmit_shift(sh_chan)), TCR); +} + +static void dmae_start(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + + chcr |= (CHCR_DE|CHCR_IE); + sh_dmae_writel(sh_chan, chcr, CHCR); +} + +static void dmae_halt(struct sh_dmae_chan *sh_chan) +{ + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + + chcr &= ~(CHCR_DE | CHCR_TE | CHCR_IE); + sh_dmae_writel(sh_chan, chcr, CHCR); +} + +static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val) +{ + int ret = dmae_is_idle(sh_chan); + /* When DMA was working, can not set data to CHCR */ + if (ret) + return ret; + + sh_dmae_writel(sh_chan, val, CHCR); + return 0; +} + +#define DMARS1_ADDR 0x04 +#define DMARS2_ADDR 0x08 +#define DMARS_SHIFT 8 +#define DMARS_CHAN_MSK 0x01 +static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) +{ + u32 addr; + int shift = 0; + int ret = dmae_is_idle(sh_chan); + if (ret) + return ret; + + if (sh_chan->id & DMARS_CHAN_MSK) + shift = DMARS_SHIFT; + + switch (sh_chan->id) { + /* DMARS0 */ + case 0: + case 1: + addr = SH_DMARS_BASE; + break; + /* DMARS1 */ + case 2: + case 3: + addr = (SH_DMARS_BASE + DMARS1_ADDR); + break; + /* DMARS2 */ + case 4: + case 5: + addr = (SH_DMARS_BASE + DMARS2_ADDR); + break; + default: + return -EINVAL; + } + + ctrl_outw((val << shift) | + (ctrl_inw(addr) & (shift ? 0xFF00 : 0x00FF)), + addr); + + return 0; +} + +static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct sh_desc *desc = tx_to_sh_desc(tx); + struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan); + dma_cookie_t cookie; + + spin_lock_bh(&sh_chan->desc_lock); + + cookie = sh_chan->common.cookie; + cookie++; + if (cookie < 0) + cookie = 1; + + /* If desc only in the case of 1 */ + if (desc->async_tx.cookie != -EBUSY) + desc->async_tx.cookie = cookie; + sh_chan->common.cookie = desc->async_tx.cookie; + + list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev); + + spin_unlock_bh(&sh_chan->desc_lock); + + return cookie; +} + +static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan) +{ + struct sh_desc *desc, *_desc, *ret = NULL; + + spin_lock_bh(&sh_chan->desc_lock); + list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) { + if (async_tx_test_ack(&desc->async_tx)) { + list_del(&desc->node); + ret = desc; + break; + } + } + spin_unlock_bh(&sh_chan->desc_lock); + + return ret; +} + +static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc) +{ + if (desc) { + spin_lock_bh(&sh_chan->desc_lock); + + list_splice_init(&desc->tx_list, &sh_chan->ld_free); + list_add(&desc->node, &sh_chan->ld_free); + + spin_unlock_bh(&sh_chan->desc_lock); + } +} + +static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + struct sh_desc *desc; + + spin_lock_bh(&sh_chan->desc_lock); + while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) { + spin_unlock_bh(&sh_chan->desc_lock); + desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL); + if (!desc) { + spin_lock_bh(&sh_chan->desc_lock); + break; + } + dma_async_tx_descriptor_init(&desc->async_tx, + &sh_chan->common); + desc->async_tx.tx_submit = sh_dmae_tx_submit; + desc->async_tx.flags = DMA_CTRL_ACK; + INIT_LIST_HEAD(&desc->tx_list); + sh_dmae_put_desc(sh_chan, desc); + + spin_lock_bh(&sh_chan->desc_lock); + sh_chan->descs_allocated++; + } + spin_unlock_bh(&sh_chan->desc_lock); + + return sh_chan->descs_allocated; +} + +/* + * sh_dma_free_chan_resources - Free all resources of the channel. + */ +static void sh_dmae_free_chan_resources(struct dma_chan *chan) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + struct sh_desc *desc, *_desc; + LIST_HEAD(list); + + BUG_ON(!list_empty(&sh_chan->ld_queue)); + spin_lock_bh(&sh_chan->desc_lock); + + list_splice_init(&sh_chan->ld_free, &list); + sh_chan->descs_allocated = 0; + + spin_unlock_bh(&sh_chan->desc_lock); + + list_for_each_entry_safe(desc, _desc, &list, node) + kfree(desc); +} + +static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( + struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct sh_dmae_chan *sh_chan; + struct sh_desc *first = NULL, *prev = NULL, *new; + size_t copy_size; + + if (!chan) + return NULL; + + if (!len) + return NULL; + + sh_chan = to_sh_chan(chan); + + do { + /* Allocate the link descriptor from DMA pool */ + new = sh_dmae_get_desc(sh_chan); + if (!new) { + dev_err(sh_chan->dev, + "No free memory for link descriptor\n"); + goto err_get_desc; + } + + copy_size = min(len, (size_t)SH_DMA_TCR_MAX); + + new->hw.sar = dma_src; + new->hw.dar = dma_dest; + new->hw.tcr = copy_size; + if (!first) + first = new; + + new->mark = DESC_NCOMP; + async_tx_ack(&new->async_tx); + + prev = new; + len -= copy_size; + dma_src += copy_size; + dma_dest += copy_size; + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &first->tx_list); + } while (len); + + new->async_tx.flags = flags; /* client is in control of this ack */ + new->async_tx.cookie = -EBUSY; /* Last desc */ + + return &first->async_tx; + +err_get_desc: + sh_dmae_put_desc(sh_chan, first); + return NULL; + +} + +/* + * sh_chan_ld_cleanup - Clean up link descriptors + * + * This function clean up the ld_queue of DMA channel. + */ +static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan) +{ + struct sh_desc *desc, *_desc; + + spin_lock_bh(&sh_chan->desc_lock); + list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) { + dma_async_tx_callback callback; + void *callback_param; + + /* non send data */ + if (desc->mark == DESC_NCOMP) + break; + + /* send data sesc */ + callback = desc->async_tx.callback; + callback_param = desc->async_tx.callback_param; + + /* Remove from ld_queue list */ + list_splice_init(&desc->tx_list, &sh_chan->ld_free); + + dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n", + desc); + + list_move(&desc->node, &sh_chan->ld_free); + /* Run the link descriptor callback function */ + if (callback) { + spin_unlock_bh(&sh_chan->desc_lock); + dev_dbg(sh_chan->dev, "link descriptor %p callback\n", + desc); + callback(callback_param); + spin_lock_bh(&sh_chan->desc_lock); + } + } + spin_unlock_bh(&sh_chan->desc_lock); +} + +static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) +{ + struct list_head *ld_node; + struct sh_dmae_regs hw; + + /* DMA work check */ + if (dmae_is_idle(sh_chan)) + return; + + /* Find the first un-transfer desciptor */ + for (ld_node = sh_chan->ld_queue.next; + (ld_node != &sh_chan->ld_queue) + && (to_sh_desc(ld_node)->mark == DESC_COMP); + ld_node = ld_node->next) + cpu_relax(); + + if (ld_node != &sh_chan->ld_queue) { + /* Get the ld start address from ld_queue */ + hw = to_sh_desc(ld_node)->hw; + dmae_set_reg(sh_chan, hw); + dmae_start(sh_chan); + } +} + +static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + sh_chan_xfer_ld_queue(sh_chan); +} + +static enum dma_status sh_dmae_is_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, + dma_cookie_t *used) +{ + struct sh_dmae_chan *sh_chan = to_sh_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + + sh_dmae_chan_ld_cleanup(sh_chan); + + last_used = chan->cookie; + last_complete = sh_chan->completed_cookie; + if (last_complete == -EBUSY) + last_complete = last_used; + + if (done) + *done = last_complete; + + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +static irqreturn_t sh_dmae_interrupt(int irq, void *data) +{ + irqreturn_t ret = IRQ_NONE; + struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; + u32 chcr = sh_dmae_readl(sh_chan, CHCR); + + if (chcr & CHCR_TE) { + /* DMA stop */ + dmae_halt(sh_chan); + + ret = IRQ_HANDLED; + tasklet_schedule(&sh_chan->tasklet); + } + + return ret; +} + +#if defined(CONFIG_CPU_SH4) +static irqreturn_t sh_dmae_err(int irq, void *data) +{ + int err = 0; + struct sh_dmae_device *shdev = (struct sh_dmae_device *)data; + + /* IRQ Multi */ + if (shdev->pdata.mode & SHDMA_MIX_IRQ) { + int cnt = 0; + switch (irq) { +#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ) + case DMTE6_IRQ: + cnt++; +#endif + case DMTE0_IRQ: + if (dmaor_read_reg(cnt) & (DMAOR_NMIF | DMAOR_AE)) { + disable_irq(irq); + return IRQ_HANDLED; + } + default: + return IRQ_NONE; + } + } else { + /* reset dma controller */ + err = sh_dmae_rst(0); + if (err) + return err; + if (shdev->pdata.mode & SHDMA_DMAOR1) { + err = sh_dmae_rst(1); + if (err) + return err; + } + disable_irq(irq); + return IRQ_HANDLED; + } +} +#endif + +static void dmae_do_tasklet(unsigned long data) +{ + struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; + struct sh_desc *desc, *_desc, *cur_desc = NULL; + u32 sar_buf = sh_dmae_readl(sh_chan, SAR); + list_for_each_entry_safe(desc, _desc, + &sh_chan->ld_queue, node) { + if ((desc->hw.sar + desc->hw.tcr) == sar_buf) { + cur_desc = desc; + break; + } + } + + if (cur_desc) { + switch (cur_desc->async_tx.cookie) { + case 0: /* other desc data */ + break; + case -EBUSY: /* last desc */ + sh_chan->completed_cookie = + cur_desc->async_tx.cookie; + break; + default: /* first desc ( 0 < )*/ + sh_chan->completed_cookie = + cur_desc->async_tx.cookie - 1; + break; + } + cur_desc->mark = DESC_COMP; + } + /* Next desc */ + sh_chan_xfer_ld_queue(sh_chan); + sh_dmae_chan_ld_cleanup(sh_chan); +} + +static unsigned int get_dmae_irq(unsigned int id) +{ + unsigned int irq = 0; + if (id < ARRAY_SIZE(dmte_irq_map)) + irq = dmte_irq_map[id]; + return irq; +} + +static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id) +{ + int err; + unsigned int irq = get_dmae_irq(id); + unsigned long irqflags = IRQF_DISABLED; + struct sh_dmae_chan *new_sh_chan; + + /* alloc channel */ + new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL); + if (!new_sh_chan) { + dev_err(shdev->common.dev, "No free memory for allocating " + "dma channels!\n"); + return -ENOMEM; + } + + new_sh_chan->dev = shdev->common.dev; + new_sh_chan->id = id; + + /* Init DMA tasklet */ + tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet, + (unsigned long)new_sh_chan); + + /* Init the channel */ + dmae_init(new_sh_chan); + + spin_lock_init(&new_sh_chan->desc_lock); + + /* Init descripter manage list */ + INIT_LIST_HEAD(&new_sh_chan->ld_queue); + INIT_LIST_HEAD(&new_sh_chan->ld_free); + + /* copy struct dma_device */ + new_sh_chan->common.device = &shdev->common; + + /* Add the channel to DMA device channel list */ + list_add_tail(&new_sh_chan->common.device_node, + &shdev->common.channels); + shdev->common.chancnt++; + + if (shdev->pdata.mode & SHDMA_MIX_IRQ) { + irqflags = IRQF_SHARED; +#if defined(DMTE6_IRQ) + if (irq >= DMTE6_IRQ) + irq = DMTE6_IRQ; + else +#endif + irq = DMTE0_IRQ; + } + + snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id), + "sh-dmae%d", new_sh_chan->id); + + /* set up channel irq */ + err = request_irq(irq, &sh_dmae_interrupt, + irqflags, new_sh_chan->dev_id, new_sh_chan); + if (err) { + dev_err(shdev->common.dev, "DMA channel %d request_irq error " + "with return %d\n", id, err); + goto err_no_irq; + } + + /* CHCR register control function */ + new_sh_chan->set_chcr = dmae_set_chcr; + /* DMARS register control function */ + new_sh_chan->set_dmars = dmae_set_dmars; + + shdev->chan[id] = new_sh_chan; + return 0; + +err_no_irq: + /* remove from dmaengine device node */ + list_del(&new_sh_chan->common.device_node); + kfree(new_sh_chan); + return err; +} + +static void sh_dmae_chan_remove(struct sh_dmae_device *shdev) +{ + int i; + + for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) { + if (shdev->chan[i]) { + struct sh_dmae_chan *shchan = shdev->chan[i]; + if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) + free_irq(dmte_irq_map[i], shchan); + + list_del(&shchan->common.device_node); + kfree(shchan); + shdev->chan[i] = NULL; + } + } + shdev->common.chancnt = 0; +} + +static int __init sh_dmae_probe(struct platform_device *pdev) +{ + int err = 0, cnt, ecnt; + unsigned long irqflags = IRQF_DISABLED; +#if defined(CONFIG_CPU_SH4) + int eirq[] = { DMAE0_IRQ, +#if defined(DMAE1_IRQ) + DMAE1_IRQ +#endif + }; +#endif + struct sh_dmae_device *shdev; + + shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL); + if (!shdev) { + dev_err(&pdev->dev, "No enough memory\n"); + err = -ENOMEM; + goto shdev_err; + } + + /* get platform data */ + if (!pdev->dev.platform_data) + goto shdev_err; + + /* platform data */ + memcpy(&shdev->pdata, pdev->dev.platform_data, + sizeof(struct sh_dmae_pdata)); + + /* reset dma controller */ + err = sh_dmae_rst(0); + if (err) + goto rst_err; + + /* SH7780/85/23 has DMAOR1 */ + if (shdev->pdata.mode & SHDMA_DMAOR1) { + err = sh_dmae_rst(1); + if (err) + goto rst_err; + } + + INIT_LIST_HEAD(&shdev->common.channels); + + dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask); + shdev->common.device_alloc_chan_resources + = sh_dmae_alloc_chan_resources; + shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources; + shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy; + shdev->common.device_is_tx_complete = sh_dmae_is_complete; + shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending; + shdev->common.dev = &pdev->dev; + +#if defined(CONFIG_CPU_SH4) + /* Non Mix IRQ mode SH7722/SH7730 etc... */ + if (shdev->pdata.mode & SHDMA_MIX_IRQ) { + irqflags = IRQF_SHARED; + eirq[0] = DMTE0_IRQ; +#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ) + eirq[1] = DMTE6_IRQ; +#endif + } + + for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) { + err = request_irq(eirq[ecnt], sh_dmae_err, + irqflags, "DMAC Address Error", shdev); + if (err) { + dev_err(&pdev->dev, "DMA device request_irq" + "error (irq %d) with return %d\n", + eirq[ecnt], err); + goto eirq_err; + } + } +#endif /* CONFIG_CPU_SH4 */ + + /* Create DMA Channel */ + for (cnt = 0 ; cnt < MAX_DMA_CHANNELS ; cnt++) { + err = sh_dmae_chan_probe(shdev, cnt); + if (err) + goto chan_probe_err; + } + + platform_set_drvdata(pdev, shdev); + dma_async_device_register(&shdev->common); + + return err; + +chan_probe_err: + sh_dmae_chan_remove(shdev); + +eirq_err: + for (ecnt-- ; ecnt >= 0; ecnt--) + free_irq(eirq[ecnt], shdev); + +rst_err: + kfree(shdev); + +shdev_err: + return err; +} + +static int __exit sh_dmae_remove(struct platform_device *pdev) +{ + struct sh_dmae_device *shdev = platform_get_drvdata(pdev); + + dma_async_device_unregister(&shdev->common); + + if (shdev->pdata.mode & SHDMA_MIX_IRQ) { + free_irq(DMTE0_IRQ, shdev); +#if defined(DMTE6_IRQ) + free_irq(DMTE6_IRQ, shdev); +#endif + } + + /* channel data remove */ + sh_dmae_chan_remove(shdev); + + if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) { + free_irq(DMAE0_IRQ, shdev); +#if defined(DMAE1_IRQ) + free_irq(DMAE1_IRQ, shdev); +#endif + } + kfree(shdev); + + return 0; +} + +static void sh_dmae_shutdown(struct platform_device *pdev) +{ + struct sh_dmae_device *shdev = platform_get_drvdata(pdev); + sh_dmae_ctl_stop(0); + if (shdev->pdata.mode & SHDMA_DMAOR1) + sh_dmae_ctl_stop(1); +} + +static struct platform_driver sh_dmae_driver = { + .remove = __exit_p(sh_dmae_remove), + .shutdown = sh_dmae_shutdown, + .driver = { + .name = "sh-dma-engine", + }, +}; + +static int __init sh_dmae_init(void) +{ + return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe); +} +module_init(sh_dmae_init); + +static void __exit sh_dmae_exit(void) +{ + platform_driver_unregister(&sh_dmae_driver); +} +module_exit(sh_dmae_exit); + +MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>"); +MODULE_DESCRIPTION("Renesas SH DMA Engine driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h new file mode 100644 index 00000000000..2b4bc15a2c0 --- /dev/null +++ b/drivers/dma/shdma.h @@ -0,0 +1,64 @@ +/* + * Renesas SuperH DMA Engine support + * + * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com> + * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#ifndef __DMA_SHDMA_H +#define __DMA_SHDMA_H + +#include <linux/device.h> +#include <linux/dmapool.h> +#include <linux/dmaengine.h> + +#define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */ + +struct sh_dmae_regs { + u32 sar; /* SAR / source address */ + u32 dar; /* DAR / destination address */ + u32 tcr; /* TCR / transfer count */ +}; + +struct sh_desc { + struct list_head tx_list; + struct sh_dmae_regs hw; + struct list_head node; + struct dma_async_tx_descriptor async_tx; + int mark; +}; + +struct sh_dmae_chan { + dma_cookie_t completed_cookie; /* The maximum cookie completed */ + spinlock_t desc_lock; /* Descriptor operation lock */ + struct list_head ld_queue; /* Link descriptors queue */ + struct list_head ld_free; /* Link descriptors free */ + struct dma_chan common; /* DMA common channel */ + struct device *dev; /* Channel device */ + struct tasklet_struct tasklet; /* Tasklet */ + int descs_allocated; /* desc count */ + int id; /* Raw id of this channel */ + char dev_id[16]; /* unique name per DMAC of channel */ + + /* Set chcr */ + int (*set_chcr)(struct sh_dmae_chan *sh_chan, u32 regs); + /* Set DMA resource */ + int (*set_dmars)(struct sh_dmae_chan *sh_chan, u16 res); +}; + +struct sh_dmae_device { + struct dma_device common; + struct sh_dmae_chan *chan[MAX_DMA_CHANNELS]; + struct sh_dmae_pdata pdata; +}; + +#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common) +#define to_sh_desc(lh) container_of(lh, struct sh_desc, node) +#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx) + +#endif /* __DMA_SHDMA_H */ diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c index 7837930146a..fb6bb64e886 100644 --- a/drivers/dma/txx9dmac.c +++ b/drivers/dma/txx9dmac.c @@ -180,9 +180,8 @@ static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc) static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc) { - if (!list_empty(&desc->txd.tx_list)) - desc = list_entry(desc->txd.tx_list.prev, - struct txx9dmac_desc, desc_node); + if (!list_empty(&desc->tx_list)) + desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node); return desc; } @@ -197,6 +196,7 @@ static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc, desc = kzalloc(sizeof(*desc), flags); if (!desc) return NULL; + INIT_LIST_HEAD(&desc->tx_list); dma_async_tx_descriptor_init(&desc->txd, &dc->chan); desc->txd.tx_submit = txx9dmac_tx_submit; /* txd.flags will be overwritten in prep funcs */ @@ -245,7 +245,7 @@ static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc, struct txx9dmac_dev *ddev = dc->ddev; struct txx9dmac_desc *child; - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dma_sync_single_for_cpu(chan2parent(&dc->chan), child->txd.phys, ddev->descsize, DMA_TO_DEVICE); @@ -267,11 +267,11 @@ static void txx9dmac_desc_put(struct txx9dmac_chan *dc, txx9dmac_sync_desc_for_cpu(dc, desc); spin_lock_bh(&dc->lock); - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) dev_vdbg(chan2dev(&dc->chan), "moving child desc %p to freelist\n", child); - list_splice_init(&desc->txd.tx_list, &dc->free_list); + list_splice_init(&desc->tx_list, &dc->free_list); dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n", desc); list_add(&desc->desc_node, &dc->free_list); @@ -429,7 +429,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc, param = txd->callback_param; txx9dmac_sync_desc_for_cpu(dc, desc); - list_splice_init(&txd->tx_list, &dc->free_list); + list_splice_init(&desc->tx_list, &dc->free_list); list_move(&desc->desc_node, &dc->free_list); if (!ds) { @@ -571,7 +571,7 @@ static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr) "Bad descriptor submitted for DMA! (cookie: %d)\n", bad_desc->txd.cookie); txx9dmac_dump_desc(dc, &bad_desc->hwdesc); - list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) + list_for_each_entry(child, &bad_desc->tx_list, desc_node) txx9dmac_dump_desc(dc, &child->hwdesc); /* Pretend the descriptor completed successfully */ txx9dmac_descriptor_complete(dc, bad_desc); @@ -613,7 +613,7 @@ static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc) return; } - list_for_each_entry(child, &desc->txd.tx_list, desc_node) + list_for_each_entry(child, &desc->tx_list, desc_node) if (desc_read_CHAR(dc, child) == chain) { /* Currently in progress */ if (csr & TXX9_DMA_CSR_ABCHC) @@ -823,8 +823,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, dma_sync_single_for_device(chan2parent(&dc->chan), prev->txd.phys, ddev->descsize, DMA_TO_DEVICE); - list_add_tail(&desc->desc_node, - &first->txd.tx_list); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; } @@ -919,8 +918,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, prev->txd.phys, ddev->descsize, DMA_TO_DEVICE); - list_add_tail(&desc->desc_node, - &first->txd.tx_list); + list_add_tail(&desc->desc_node, &first->tx_list); } prev = desc; } diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h index c907ff01d27..365d42366b9 100644 --- a/drivers/dma/txx9dmac.h +++ b/drivers/dma/txx9dmac.h @@ -231,6 +231,7 @@ struct txx9dmac_desc { /* THEN values for driver housekeeping */ struct list_head desc_node ____cacheline_aligned; + struct list_head tx_list; struct dma_async_tx_descriptor txd; size_t len; }; diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index a3ca18e2d7c..02127e59fe8 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -133,6 +133,13 @@ config EDAC_I3000 Support for error detection and correction on the Intel 3000 and 3010 server chipsets. +config EDAC_I3200 + tristate "Intel 3200" + depends on EDAC_MM_EDAC && PCI && X86 && EXPERIMENTAL + help + Support for error detection and correction on the Intel + 3200 and 3210 server chipsets. + config EDAC_X38 tristate "Intel X38" depends on EDAC_MM_EDAC && PCI && X86 @@ -176,11 +183,11 @@ config EDAC_I5100 San Clemente MCH. config EDAC_MPC85XX - tristate "Freescale MPC85xx" - depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx + tristate "Freescale MPC83xx / MPC85xx" + depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || MPC85xx) help Support for error detection and correction on the Freescale - MPC8560, MPC8540, MPC8548 + MPC8349, MPC8560, MPC8540, MPC8548 config EDAC_MV64X60 tristate "Marvell MV64x60" diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index cfa033ce53a..7a473bbe8ab 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o obj-$(CONFIG_EDAC_I3000) += i3000_edac.o +obj-$(CONFIG_EDAC_I3200) += i3200_edac.o obj-$(CONFIG_EDAC_X38) += x38_edac.o obj-$(CONFIG_EDAC_I82860) += i82860_edac.o obj-$(CONFIG_EDAC_R82600) += r82600_edac.o @@ -49,3 +50,4 @@ obj-$(CONFIG_EDAC_CELL) += cell_edac.o obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o + diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index 8c54196b5ab..3d50274f134 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c @@ -885,14 +885,14 @@ static int __devinit cpc925_probe(struct platform_device *pdev) if (!devm_request_mem_region(&pdev->dev, r->start, - r->end - r->start + 1, + resource_size(r), pdev->name)) { cpc925_printk(KERN_ERR, "Unable to request mem region\n"); res = -EBUSY; goto err1; } - vbase = devm_ioremap(&pdev->dev, r->start, r->end - r->start + 1); + vbase = devm_ioremap(&pdev->dev, r->start, resource_size(r)); if (!vbase) { cpc925_printk(KERN_ERR, "Unable to ioremap device\n"); res = -ENOMEM; @@ -953,7 +953,7 @@ err3: cpc925_mc_exit(mci); edac_mc_free(mci); err2: - devm_release_mem_region(&pdev->dev, r->start, r->end-r->start+1); + devm_release_mem_region(&pdev->dev, r->start, resource_size(r)); err1: devres_release_group(&pdev->dev, cpc925_probe); out: diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index b02a6a69a8f..d5e13c94714 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -356,7 +356,6 @@ static void complete_edac_device_list_del(struct rcu_head *head) edac_dev = container_of(head, struct edac_device_ctl_info, rcu); INIT_LIST_HEAD(&edac_dev->link); - complete(&edac_dev->removal_complete); } /* @@ -369,10 +368,8 @@ static void del_edac_device_from_global_list(struct edac_device_ctl_info *edac_device) { list_del_rcu(&edac_device->link); - - init_completion(&edac_device->removal_complete); call_rcu(&edac_device->rcu, complete_edac_device_list_del); - wait_for_completion(&edac_device->removal_complete); + rcu_barrier(); } /* diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 335b7ebdb11..b629c41756f 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -418,16 +418,14 @@ static void complete_mc_list_del(struct rcu_head *head) mci = container_of(head, struct mem_ctl_info, rcu); INIT_LIST_HEAD(&mci->link); - complete(&mci->complete); } static void del_mc_from_global_list(struct mem_ctl_info *mci) { atomic_dec(&edac_handlers); list_del_rcu(&mci->link); - init_completion(&mci->complete); call_rcu(&mci->rcu, complete_mc_list_del); - wait_for_completion(&mci->complete); + rcu_barrier(); } /** diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index 30b585b1d60..efb5d565078 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -174,7 +174,6 @@ static void complete_edac_pci_list_del(struct rcu_head *head) pci = container_of(head, struct edac_pci_ctl_info, rcu); INIT_LIST_HEAD(&pci->link); - complete(&pci->complete); } /* @@ -185,9 +184,8 @@ static void complete_edac_pci_list_del(struct rcu_head *head) static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci) { list_del_rcu(&pci->link); - init_completion(&pci->complete); call_rcu(&pci->rcu, complete_edac_pci_list_del); - wait_for_completion(&pci->complete); + rcu_barrier(); } #if 0 diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c new file mode 100644 index 00000000000..fde4db91c4d --- /dev/null +++ b/drivers/edac/i3200_edac.c @@ -0,0 +1,527 @@ +/* + * Intel 3200/3210 Memory Controller kernel module + * Copyright (C) 2008-2009 Akamai Technologies, Inc. + * Portions by Hitoshi Mitake <h.mitake@gmail.com>. + * + * This file may be distributed under the terms of the + * GNU General Public License. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include <linux/edac.h> +#include <linux/io.h> +#include "edac_core.h" + +#define I3200_REVISION "1.1" + +#define EDAC_MOD_STR "i3200_edac" + +#define PCI_DEVICE_ID_INTEL_3200_HB 0x29f0 + +#define I3200_RANKS 8 +#define I3200_RANKS_PER_CHANNEL 4 +#define I3200_CHANNELS 2 + +/* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */ + +#define I3200_MCHBAR_LOW 0x48 /* MCH Memory Mapped Register BAR */ +#define I3200_MCHBAR_HIGH 0x4c +#define I3200_MCHBAR_MASK 0xfffffc000ULL /* bits 35:14 */ +#define I3200_MMR_WINDOW_SIZE 16384 + +#define I3200_TOM 0xa0 /* Top of Memory (16b) + * + * 15:10 reserved + * 9:0 total populated physical memory + */ +#define I3200_TOM_MASK 0x3ff /* bits 9:0 */ +#define I3200_TOM_SHIFT 26 /* 64MiB grain */ + +#define I3200_ERRSTS 0xc8 /* Error Status Register (16b) + * + * 15 reserved + * 14 Isochronous TBWRR Run Behind FIFO Full + * (ITCV) + * 13 Isochronous TBWRR Run Behind FIFO Put + * (ITSTV) + * 12 reserved + * 11 MCH Thermal Sensor Event + * for SMI/SCI/SERR (GTSE) + * 10 reserved + * 9 LOCK to non-DRAM Memory Flag (LCKF) + * 8 reserved + * 7 DRAM Throttle Flag (DTF) + * 6:2 reserved + * 1 Multi-bit DRAM ECC Error Flag (DMERR) + * 0 Single-bit DRAM ECC Error Flag (DSERR) + */ +#define I3200_ERRSTS_UE 0x0002 +#define I3200_ERRSTS_CE 0x0001 +#define I3200_ERRSTS_BITS (I3200_ERRSTS_UE | I3200_ERRSTS_CE) + + +/* Intel MMIO register space - device 0 function 0 - MMR space */ + +#define I3200_C0DRB 0x200 /* Channel 0 DRAM Rank Boundary (16b x 4) + * + * 15:10 reserved + * 9:0 Channel 0 DRAM Rank Boundary Address + */ +#define I3200_C1DRB 0x600 /* Channel 1 DRAM Rank Boundary (16b x 4) */ +#define I3200_DRB_MASK 0x3ff /* bits 9:0 */ +#define I3200_DRB_SHIFT 26 /* 64MiB grain */ + +#define I3200_C0ECCERRLOG 0x280 /* Channel 0 ECC Error Log (64b) + * + * 63:48 Error Column Address (ERRCOL) + * 47:32 Error Row Address (ERRROW) + * 31:29 Error Bank Address (ERRBANK) + * 28:27 Error Rank Address (ERRRANK) + * 26:24 reserved + * 23:16 Error Syndrome (ERRSYND) + * 15: 2 reserved + * 1 Multiple Bit Error Status (MERRSTS) + * 0 Correctable Error Status (CERRSTS) + */ +#define I3200_C1ECCERRLOG 0x680 /* Chan 1 ECC Error Log (64b) */ +#define I3200_ECCERRLOG_CE 0x1 +#define I3200_ECCERRLOG_UE 0x2 +#define I3200_ECCERRLOG_RANK_BITS 0x18000000 +#define I3200_ECCERRLOG_RANK_SHIFT 27 +#define I3200_ECCERRLOG_SYNDROME_BITS 0xff0000 +#define I3200_ECCERRLOG_SYNDROME_SHIFT 16 +#define I3200_CAPID0 0xe0 /* P.95 of spec for details */ + +struct i3200_priv { + void __iomem *window; +}; + +static int nr_channels; + +static int how_many_channels(struct pci_dev *pdev) +{ + unsigned char capid0_8b; /* 8th byte of CAPID0 */ + + pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b); + if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ + debugf0("In single channel mode.\n"); + return 1; + } else { + debugf0("In dual channel mode.\n"); + return 2; + } +} + +static unsigned long eccerrlog_syndrome(u64 log) +{ + return (log & I3200_ECCERRLOG_SYNDROME_BITS) >> + I3200_ECCERRLOG_SYNDROME_SHIFT; +} + +static int eccerrlog_row(int channel, u64 log) +{ + u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >> + I3200_ECCERRLOG_RANK_SHIFT); + return rank | (channel * I3200_RANKS_PER_CHANNEL); +} + +enum i3200_chips { + I3200 = 0, +}; + +struct i3200_dev_info { + const char *ctl_name; +}; + +struct i3200_error_info { + u16 errsts; + u16 errsts2; + u64 eccerrlog[I3200_CHANNELS]; +}; + +static const struct i3200_dev_info i3200_devs[] = { + [I3200] = { + .ctl_name = "i3200" + }, +}; + +static struct pci_dev *mci_pdev; +static int i3200_registered = 1; + + +static void i3200_clear_error_info(struct mem_ctl_info *mci) +{ + struct pci_dev *pdev; + + pdev = to_pci_dev(mci->dev); + + /* + * Clear any error bits. + * (Yes, we really clear bits by writing 1 to them.) + */ + pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS, + I3200_ERRSTS_BITS); +} + +static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci, + struct i3200_error_info *info) +{ + struct pci_dev *pdev; + struct i3200_priv *priv = mci->pvt_info; + void __iomem *window = priv->window; + + pdev = to_pci_dev(mci->dev); + + /* + * This is a mess because there is no atomic way to read all the + * registers at once and the registers can transition from CE being + * overwritten by UE. + */ + pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts); + if (!(info->errsts & I3200_ERRSTS_BITS)) + return; + + info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG); + if (nr_channels == 2) + info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG); + + pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2); + + /* + * If the error is the same for both reads then the first set + * of reads is valid. If there is a change then there is a CE + * with no info and the second set of reads is valid and + * should be UE info. + */ + if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) { + info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG); + if (nr_channels == 2) + info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG); + } + + i3200_clear_error_info(mci); +} + +static void i3200_process_error_info(struct mem_ctl_info *mci, + struct i3200_error_info *info) +{ + int channel; + u64 log; + + if (!(info->errsts & I3200_ERRSTS_BITS)) + return; + + if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) { + edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); + info->errsts = info->errsts2; + } + + for (channel = 0; channel < nr_channels; channel++) { + log = info->eccerrlog[channel]; + if (log & I3200_ECCERRLOG_UE) { + edac_mc_handle_ue(mci, 0, 0, + eccerrlog_row(channel, log), + "i3200 UE"); + } else if (log & I3200_ECCERRLOG_CE) { + edac_mc_handle_ce(mci, 0, 0, + eccerrlog_syndrome(log), + eccerrlog_row(channel, log), 0, + "i3200 CE"); + } + } +} + +static void i3200_check(struct mem_ctl_info *mci) +{ + struct i3200_error_info info; + + debugf1("MC%d: %s()\n", mci->mc_idx, __func__); + i3200_get_and_clear_error_info(mci, &info); + i3200_process_error_info(mci, &info); +} + + +void __iomem *i3200_map_mchbar(struct pci_dev *pdev) +{ + union { + u64 mchbar; + struct { + u32 mchbar_low; + u32 mchbar_high; + }; + } u; + void __iomem *window; + + pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low); + pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high); + u.mchbar &= I3200_MCHBAR_MASK; + + if (u.mchbar != (resource_size_t)u.mchbar) { + printk(KERN_ERR + "i3200: mmio space beyond accessible range (0x%llx)\n", + (unsigned long long)u.mchbar); + return NULL; + } + + window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE); + if (!window) + printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n", + (unsigned long long)u.mchbar); + + return window; +} + + +static void i3200_get_drbs(void __iomem *window, + u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]) +{ + int i; + + for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { + drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; + drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; + } +} + +static bool i3200_is_stacked(struct pci_dev *pdev, + u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]) +{ + u16 tom; + + pci_read_config_word(pdev, I3200_TOM, &tom); + tom &= I3200_TOM_MASK; + + return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom; +} + +static unsigned long drb_to_nr_pages( + u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked, + int channel, int rank) +{ + int n; + + n = drbs[channel][rank]; + if (rank > 0) + n -= drbs[channel][rank - 1]; + if (stacked && (channel == 1) && + drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1]) + n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1]; + + n <<= (I3200_DRB_SHIFT - PAGE_SHIFT); + return n; +} + +static int i3200_probe1(struct pci_dev *pdev, int dev_idx) +{ + int rc; + int i; + struct mem_ctl_info *mci = NULL; + unsigned long last_page; + u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]; + bool stacked; + void __iomem *window; + struct i3200_priv *priv; + + debugf0("MC: %s()\n", __func__); + + window = i3200_map_mchbar(pdev); + if (!window) + return -ENODEV; + + i3200_get_drbs(window, drbs); + nr_channels = how_many_channels(pdev); + + mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS, + nr_channels, 0); + if (!mci) + return -ENOMEM; + + debugf3("MC: %s(): init mci\n", __func__); + + mci->dev = &pdev->dev; + mci->mtype_cap = MEM_FLAG_DDR2; + + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + + mci->mod_name = EDAC_MOD_STR; + mci->mod_ver = I3200_REVISION; + mci->ctl_name = i3200_devs[dev_idx].ctl_name; + mci->dev_name = pci_name(pdev); + mci->edac_check = i3200_check; + mci->ctl_page_to_phys = NULL; + priv = mci->pvt_info; + priv->window = window; + + stacked = i3200_is_stacked(pdev, drbs); + + /* + * The dram rank boundary (DRB) reg values are boundary addresses + * for each DRAM rank with a granularity of 64MB. DRB regs are + * cumulative; the last one will contain the total memory + * contained in all ranks. + */ + last_page = -1UL; + for (i = 0; i < mci->nr_csrows; i++) { + unsigned long nr_pages; + struct csrow_info *csrow = &mci->csrows[i]; + + nr_pages = drb_to_nr_pages(drbs, stacked, + i / I3200_RANKS_PER_CHANNEL, + i % I3200_RANKS_PER_CHANNEL); + + if (nr_pages == 0) { + csrow->mtype = MEM_EMPTY; + continue; + } + + csrow->first_page = last_page + 1; + last_page += nr_pages; + csrow->last_page = last_page; + csrow->nr_pages = nr_pages; + + csrow->grain = nr_pages << PAGE_SHIFT; + csrow->mtype = MEM_DDR2; + csrow->dtype = DEV_UNKNOWN; + csrow->edac_mode = EDAC_UNKNOWN; + } + + i3200_clear_error_info(mci); + + rc = -ENODEV; + if (edac_mc_add_mc(mci)) { + debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__); + goto fail; + } + + /* get this far and it's successful */ + debugf3("MC: %s(): success\n", __func__); + return 0; + +fail: + iounmap(window); + if (mci) + edac_mc_free(mci); + + return rc; +} + +static int __devinit i3200_init_one(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int rc; + + debugf0("MC: %s()\n", __func__); + + if (pci_enable_device(pdev) < 0) + return -EIO; + + rc = i3200_probe1(pdev, ent->driver_data); + if (!mci_pdev) + mci_pdev = pci_dev_get(pdev); + + return rc; +} + +static void __devexit i3200_remove_one(struct pci_dev *pdev) +{ + struct mem_ctl_info *mci; + struct i3200_priv *priv; + + debugf0("%s()\n", __func__); + + mci = edac_mc_del_mc(&pdev->dev); + if (!mci) + return; + + priv = mci->pvt_info; + iounmap(priv->window); + + edac_mc_free(mci); +} + +static const struct pci_device_id i3200_pci_tbl[] __devinitdata = { + { + PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + I3200}, + { + 0, + } /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, i3200_pci_tbl); + +static struct pci_driver i3200_driver = { + .name = EDAC_MOD_STR, + .probe = i3200_init_one, + .remove = __devexit_p(i3200_remove_one), + .id_table = i3200_pci_tbl, +}; + +static int __init i3200_init(void) +{ + int pci_rc; + + debugf3("MC: %s()\n", __func__); + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + pci_rc = pci_register_driver(&i3200_driver); + if (pci_rc < 0) + goto fail0; + + if (!mci_pdev) { + i3200_registered = 0; + mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_3200_HB, NULL); + if (!mci_pdev) { + debugf0("i3200 pci_get_device fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + + pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl); + if (pci_rc < 0) { + debugf0("i3200 init fail\n"); + pci_rc = -ENODEV; + goto fail1; + } + } + + return 0; + +fail1: + pci_unregister_driver(&i3200_driver); + +fail0: + if (mci_pdev) + pci_dev_put(mci_pdev); + + return pci_rc; +} + +static void __exit i3200_exit(void) +{ + debugf3("MC: %s()\n", __func__); + + pci_unregister_driver(&i3200_driver); + if (!i3200_registered) { + i3200_remove_one(mci_pdev); + pci_dev_put(mci_pdev); + } +} + +module_init(i3200_init); +module_exit(i3200_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Akamai Technologies, Inc."); +MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 3f2ccfc6407..157f6504f25 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -41,7 +41,9 @@ static u32 orig_pci_err_en; #endif static u32 orig_l2_err_disable; +#ifdef CONFIG_MPC85xx static u32 orig_hid1[2]; +#endif /************************ MC SYSFS parts ***********************************/ @@ -646,6 +648,7 @@ static struct of_device_id mpc85xx_l2_err_of_match[] = { { .compatible = "fsl,mpc8560-l2-cache-controller", }, { .compatible = "fsl,mpc8568-l2-cache-controller", }, { .compatible = "fsl,mpc8572-l2-cache-controller", }, + { .compatible = "fsl,p2020-l2-cache-controller", }, {}, }; @@ -788,19 +791,20 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci) csrow = &mci->csrows[index]; cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 + (index * MPC85XX_MC_CS_BNDS_OFS)); - start = (cs_bnds & 0xfff0000) << 4; - end = ((cs_bnds & 0xfff) << 20); - if (start) - start |= 0xfffff; - if (end) - end |= 0xfffff; + + start = (cs_bnds & 0xffff0000) >> 16; + end = (cs_bnds & 0x0000ffff); if (start == end) continue; /* not populated */ + start <<= (24 - PAGE_SHIFT); + end <<= (24 - PAGE_SHIFT); + end |= (1 << (24 - PAGE_SHIFT)) - 1; + csrow->first_page = start >> PAGE_SHIFT; csrow->last_page = end >> PAGE_SHIFT; - csrow->nr_pages = csrow->last_page + 1 - csrow->first_page; + csrow->nr_pages = end + 1 - start; csrow->grain = 8; csrow->mtype = mtype; csrow->dtype = DEV_UNKNOWN; @@ -984,6 +988,8 @@ static struct of_device_id mpc85xx_mc_err_of_match[] = { { .compatible = "fsl,mpc8560-memory-controller", }, { .compatible = "fsl,mpc8568-memory-controller", }, { .compatible = "fsl,mpc8572-memory-controller", }, + { .compatible = "fsl,mpc8349-memory-controller", }, + { .compatible = "fsl,p2020-memory-controller", }, {}, }; @@ -999,13 +1005,13 @@ static struct of_platform_driver mpc85xx_mc_err_driver = { }, }; - +#ifdef CONFIG_MPC85xx static void __init mpc85xx_mc_clear_rfxe(void *data) { orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1); mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~0x20000)); } - +#endif static int __init mpc85xx_mc_init(void) { @@ -1038,26 +1044,32 @@ static int __init mpc85xx_mc_init(void) printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n"); #endif +#ifdef CONFIG_MPC85xx /* * need to clear HID1[RFXE] to disable machine check int * so we can catch it */ if (edac_op_state == EDAC_OPSTATE_INT) on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0); +#endif return 0; } module_init(mpc85xx_mc_init); +#ifdef CONFIG_MPC85xx static void __exit mpc85xx_mc_restore_hid1(void *data) { mtspr(SPRN_HID1, orig_hid1[smp_processor_id()]); } +#endif static void __exit mpc85xx_mc_exit(void) { +#ifdef CONFIG_MPC85xx on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0); +#endif #ifdef CONFIG_PCI of_unregister_platform_driver(&mpc85xx_pci_err_driver); #endif diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c index 5131aaae8e0..a6b9fec13a7 100644 --- a/drivers/edac/mv64x60_edac.c +++ b/drivers/edac/mv64x60_edac.c @@ -90,7 +90,7 @@ static int __init mv64x60_pci_fixup(struct platform_device *pdev) return -ENOENT; } - pci_serr = ioremap(r->start, r->end - r->start + 1); + pci_serr = ioremap(r->start, resource_size(r)); if (!pci_serr) return -ENOMEM; @@ -140,7 +140,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev) if (!devm_request_mem_region(&pdev->dev, r->start, - r->end - r->start + 1, + resource_size(r), pdata->name)) { printk(KERN_ERR "%s: Error while requesting mem region\n", __func__); @@ -150,7 +150,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev) pdata->pci_vbase = devm_ioremap(&pdev->dev, r->start, - r->end - r->start + 1); + resource_size(r)); if (!pdata->pci_vbase) { printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__); res = -ENOMEM; @@ -306,7 +306,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev) if (!devm_request_mem_region(&pdev->dev, r->start, - r->end - r->start + 1, + resource_size(r), pdata->name)) { printk(KERN_ERR "%s: Error while request mem region\n", __func__); @@ -316,7 +316,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev) pdata->sram_vbase = devm_ioremap(&pdev->dev, r->start, - r->end - r->start + 1); + resource_size(r)); if (!pdata->sram_vbase) { printk(KERN_ERR "%s: Unable to setup SRAM err regs\n", __func__); @@ -474,7 +474,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) if (!devm_request_mem_region(&pdev->dev, r->start, - r->end - r->start + 1, + resource_size(r), pdata->name)) { printk(KERN_ERR "%s: Error while requesting mem region\n", __func__); @@ -484,7 +484,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) pdata->cpu_vbase[0] = devm_ioremap(&pdev->dev, r->start, - r->end - r->start + 1); + resource_size(r)); if (!pdata->cpu_vbase[0]) { printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__); res = -ENOMEM; @@ -501,7 +501,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) if (!devm_request_mem_region(&pdev->dev, r->start, - r->end - r->start + 1, + resource_size(r), pdata->name)) { printk(KERN_ERR "%s: Error while requesting mem region\n", __func__); @@ -511,7 +511,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) pdata->cpu_vbase[1] = devm_ioremap(&pdev->dev, r->start, - r->end - r->start + 1); + resource_size(r)); if (!pdata->cpu_vbase[1]) { printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__); res = -ENOMEM; @@ -726,7 +726,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev) if (!devm_request_mem_region(&pdev->dev, r->start, - r->end - r->start + 1, + resource_size(r), pdata->name)) { printk(KERN_ERR "%s: Error while requesting mem region\n", __func__); @@ -736,7 +736,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev) pdata->mc_vbase = devm_ioremap(&pdev->dev, r->start, - r->end - r->start + 1); + resource_size(r)); if (!pdata->mc_vbase) { printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__); res = -ENOMEM; diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index e4d971c8b9d..f831ea15929 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -102,6 +102,7 @@ config DRM_I915 select BACKLIGHT_CLASS_DEVICE if ACPI select INPUT if ACPI select ACPI_VIDEO if ACPI + select ACPI_BUTTON if ACPI help Choose this option if you have a system that has Intel 830M, 845G, 852GM, 855GM 865G or 915G integrated graphics. If M is selected, the diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 230c9ffdd5e..80391995bde 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -142,6 +142,19 @@ drm_gem_object_alloc(struct drm_device *dev, size_t size) if (IS_ERR(obj->filp)) goto free; + /* Basically we want to disable the OOM killer and handle ENOMEM + * ourselves by sacrificing pages from cached buffers. + * XXX shmem_file_[gs]et_gfp_mask() + */ + mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping, + GFP_HIGHUSER | + __GFP_COLD | + __GFP_FS | + __GFP_RECLAIMABLE | + __GFP_NORETRY | + __GFP_NOWARN | + __GFP_NOMEMALLOC); + kref_init(&obj->refcount); kref_init(&obj->handlecount); obj->size = size; diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5269dfa5f62..fa7b9be096b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -9,6 +9,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \ i915_gem.o \ i915_gem_debug.o \ i915_gem_tiling.o \ + i915_trace_points.o \ intel_display.o \ intel_crt.o \ intel_lvds.o \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1e3bdcee863..f8ce9a3a420 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -96,11 +96,13 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data) { struct drm_gem_object *obj = obj_priv->obj; - seq_printf(m, " %p: %s %08x %08x %d", + seq_printf(m, " %p: %s %8zd %08x %08x %d %s", obj, get_pin_flag(obj_priv), + obj->size, obj->read_domains, obj->write_domain, - obj_priv->last_rendering_seqno); + obj_priv->last_rendering_seqno, + obj_priv->dirty ? "dirty" : ""); if (obj->name) seq_printf(m, " (name: %d)", obj->name); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 5a49a1867b3..45d507ebd3f 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -33,6 +33,7 @@ #include "intel_drv.h" #include "i915_drm.h" #include "i915_drv.h" +#include "i915_trace.h" #include <linux/vgaarb.h> /* Really want an OS-independent resettable timer. Would like to have @@ -50,14 +51,18 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller) u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR; int i; + trace_i915_ring_wait_begin (dev); + for (i = 0; i < 100000; i++) { ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; acthd = I915_READ(acthd_reg); ring->space = ring->head - (ring->tail + 8); if (ring->space < 0) ring->space += ring->Size; - if (ring->space >= n) + if (ring->space >= n) { + trace_i915_ring_wait_end (dev); return 0; + } if (dev->primary->master) { struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; @@ -77,6 +82,7 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller) } + trace_i915_ring_wait_end (dev); return -EBUSY; } @@ -922,7 +928,8 @@ static int i915_get_bridge_dev(struct drm_device *dev) * how much was set aside so we can use it for our own purposes. */ static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size, - uint32_t *preallocated_size) + uint32_t *preallocated_size, + uint32_t *start) { struct drm_i915_private *dev_priv = dev->dev_private; u16 tmp = 0; @@ -1009,10 +1016,159 @@ static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size, return -1; } *preallocated_size = stolen - overhead; + *start = overhead; return 0; } +#define PTE_ADDRESS_MASK 0xfffff000 +#define PTE_ADDRESS_MASK_HIGH 0x000000f0 /* i915+ */ +#define PTE_MAPPING_TYPE_UNCACHED (0 << 1) +#define PTE_MAPPING_TYPE_DCACHE (1 << 1) /* i830 only */ +#define PTE_MAPPING_TYPE_CACHED (3 << 1) +#define PTE_MAPPING_TYPE_MASK (3 << 1) +#define PTE_VALID (1 << 0) + +/** + * i915_gtt_to_phys - take a GTT address and turn it into a physical one + * @dev: drm device + * @gtt_addr: address to translate + * + * Some chip functions require allocations from stolen space but need the + * physical address of the memory in question. We use this routine + * to get a physical address suitable for register programming from a given + * GTT address. + */ +static unsigned long i915_gtt_to_phys(struct drm_device *dev, + unsigned long gtt_addr) +{ + unsigned long *gtt; + unsigned long entry, phys; + int gtt_bar = IS_I9XX(dev) ? 0 : 1; + int gtt_offset, gtt_size; + + if (IS_I965G(dev)) { + if (IS_G4X(dev) || IS_IGDNG(dev)) { + gtt_offset = 2*1024*1024; + gtt_size = 2*1024*1024; + } else { + gtt_offset = 512*1024; + gtt_size = 512*1024; + } + } else { + gtt_bar = 3; + gtt_offset = 0; + gtt_size = pci_resource_len(dev->pdev, gtt_bar); + } + + gtt = ioremap_wc(pci_resource_start(dev->pdev, gtt_bar) + gtt_offset, + gtt_size); + if (!gtt) { + DRM_ERROR("ioremap of GTT failed\n"); + return 0; + } + + entry = *(volatile u32 *)(gtt + (gtt_addr / 1024)); + + DRM_DEBUG("GTT addr: 0x%08lx, PTE: 0x%08lx\n", gtt_addr, entry); + + /* Mask out these reserved bits on this hardware. */ + if (!IS_I9XX(dev) || IS_I915G(dev) || IS_I915GM(dev) || + IS_I945G(dev) || IS_I945GM(dev)) { + entry &= ~PTE_ADDRESS_MASK_HIGH; + } + + /* If it's not a mapping type we know, then bail. */ + if ((entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_UNCACHED && + (entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_CACHED) { + iounmap(gtt); + return 0; + } + + if (!(entry & PTE_VALID)) { + DRM_ERROR("bad GTT entry in stolen space\n"); + iounmap(gtt); + return 0; + } + + iounmap(gtt); + + phys =(entry & PTE_ADDRESS_MASK) | + ((uint64_t)(entry & PTE_ADDRESS_MASK_HIGH) << (32 - 4)); + + DRM_DEBUG("GTT addr: 0x%08lx, phys addr: 0x%08lx\n", gtt_addr, phys); + + return phys; +} + +static void i915_warn_stolen(struct drm_device *dev) +{ + DRM_ERROR("not enough stolen space for compressed buffer, disabling\n"); + DRM_ERROR("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n"); +} + +static void i915_setup_compression(struct drm_device *dev, int size) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_mm_node *compressed_fb, *compressed_llb; + unsigned long cfb_base, ll_base; + + /* Leave 1M for line length buffer & misc. */ + compressed_fb = drm_mm_search_free(&dev_priv->vram, size, 4096, 0); + if (!compressed_fb) { + i915_warn_stolen(dev); + return; + } + + compressed_fb = drm_mm_get_block(compressed_fb, size, 4096); + if (!compressed_fb) { + i915_warn_stolen(dev); + return; + } + + cfb_base = i915_gtt_to_phys(dev, compressed_fb->start); + if (!cfb_base) { + DRM_ERROR("failed to get stolen phys addr, disabling FBC\n"); + drm_mm_put_block(compressed_fb); + } + + if (!IS_GM45(dev)) { + compressed_llb = drm_mm_search_free(&dev_priv->vram, 4096, + 4096, 0); + if (!compressed_llb) { + i915_warn_stolen(dev); + return; + } + + compressed_llb = drm_mm_get_block(compressed_llb, 4096, 4096); + if (!compressed_llb) { + i915_warn_stolen(dev); + return; + } + + ll_base = i915_gtt_to_phys(dev, compressed_llb->start); + if (!ll_base) { + DRM_ERROR("failed to get stolen phys addr, disabling FBC\n"); + drm_mm_put_block(compressed_fb); + drm_mm_put_block(compressed_llb); + } + } + + dev_priv->cfb_size = size; + + if (IS_GM45(dev)) { + g4x_disable_fbc(dev); + I915_WRITE(DPFC_CB_BASE, compressed_fb->start); + } else { + i8xx_disable_fbc(dev); + I915_WRITE(FBC_CFB_BASE, cfb_base); + I915_WRITE(FBC_LL_BASE, ll_base); + } + + DRM_DEBUG("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n", cfb_base, + ll_base, size >> 20); +} + /* true = enable decode, false = disable decoder */ static unsigned int i915_vga_set_decode(void *cookie, bool state) { @@ -1027,6 +1183,7 @@ static unsigned int i915_vga_set_decode(void *cookie, bool state) } static int i915_load_modeset_init(struct drm_device *dev, + unsigned long prealloc_start, unsigned long prealloc_size, unsigned long agp_size) { @@ -1047,6 +1204,10 @@ static int i915_load_modeset_init(struct drm_device *dev, /* Basic memrange allocator for stolen space (aka vram) */ drm_mm_init(&dev_priv->vram, 0, prealloc_size); + DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024)); + + /* We're off and running w/KMS */ + dev_priv->mm.suspended = 0; /* Let GEM Manage from end of prealloc space to end of aperture. * @@ -1059,10 +1220,25 @@ static int i915_load_modeset_init(struct drm_device *dev, */ i915_gem_do_init(dev, prealloc_size, agp_size - 4096); + mutex_lock(&dev->struct_mutex); ret = i915_gem_init_ringbuffer(dev); + mutex_unlock(&dev->struct_mutex); if (ret) goto out; + /* Try to set up FBC with a reasonable compressed buffer size */ + if (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev) || IS_GM45(dev)) && + i915_powersave) { + int cfb_size; + + /* Try to get an 8M buffer... */ + if (prealloc_size > (9*1024*1024)) + cfb_size = 8*1024*1024; + else /* fall back to 7/8 of the stolen space */ + cfb_size = prealloc_size * 7 / 8; + i915_setup_compression(dev, cfb_size); + } + /* Allow hardware batchbuffers unless told otherwise. */ dev_priv->allow_batchbuffer = 1; @@ -1180,7 +1356,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) struct drm_i915_private *dev_priv = dev->dev_private; resource_size_t base, size; int ret = 0, mmio_bar = IS_I9XX(dev) ? 0 : 1; - uint32_t agp_size, prealloc_size; + uint32_t agp_size, prealloc_size, prealloc_start; /* i915 has 4 more counters */ dev->counters += 4; @@ -1234,7 +1410,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) "performance may suffer.\n"); } - ret = i915_probe_agp(dev, &agp_size, &prealloc_size); + ret = i915_probe_agp(dev, &agp_size, &prealloc_size, &prealloc_start); if (ret) goto out_iomapfree; @@ -1300,8 +1476,12 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) return ret; } + /* Start out suspended */ + dev_priv->mm.suspended = 1; + if (drm_core_check_feature(dev, DRIVER_MODESET)) { - ret = i915_load_modeset_init(dev, prealloc_size, agp_size); + ret = i915_load_modeset_init(dev, prealloc_start, + prealloc_size, agp_size); if (ret < 0) { DRM_ERROR("failed to init modeset\n"); goto out_workqueue_free; @@ -1313,6 +1493,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!IS_IGDNG(dev)) intel_opregion_init(dev, 0); + setup_timer(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed, + (unsigned long) dev); return 0; out_workqueue_free: @@ -1333,6 +1515,7 @@ int i915_driver_unload(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; destroy_workqueue(dev_priv->wq); + del_timer_sync(&dev_priv->hangcheck_timer); io_mapping_free(dev_priv->mm.gtt_mapping); if (dev_priv->mm.gtt_mtrr >= 0) { @@ -1472,6 +1655,7 @@ struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0), DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0), DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0), + DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, 0), }; int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index dbe568c9327..b93814c0d3e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -89,6 +89,8 @@ static int i915_suspend(struct drm_device *dev, pm_message_t state) pci_set_power_state(dev->pdev, PCI_D3hot); } + dev_priv->suspended = 1; + return 0; } @@ -97,8 +99,6 @@ static int i915_resume(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int ret = 0; - pci_set_power_state(dev->pdev, PCI_D0); - pci_restore_state(dev->pdev); if (pci_enable_device(dev->pdev)) return -1; pci_set_master(dev->pdev); @@ -124,9 +124,135 @@ static int i915_resume(struct drm_device *dev) drm_helper_resume_force_mode(dev); } + dev_priv->suspended = 0; + return ret; } +/** + * i965_reset - reset chip after a hang + * @dev: drm device to reset + * @flags: reset domains + * + * Reset the chip. Useful if a hang is detected. Returns zero on successful + * reset or otherwise an error code. + * + * Procedure is fairly simple: + * - reset the chip using the reset reg + * - re-init context state + * - re-init hardware status page + * - re-init ring buffer + * - re-init interrupt state + * - re-init display + */ +int i965_reset(struct drm_device *dev, u8 flags) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + unsigned long timeout; + u8 gdrst; + /* + * We really should only reset the display subsystem if we actually + * need to + */ + bool need_display = true; + + mutex_lock(&dev->struct_mutex); + + /* + * Clear request list + */ + i915_gem_retire_requests(dev); + + if (need_display) + i915_save_display(dev); + + if (IS_I965G(dev) || IS_G4X(dev)) { + /* + * Set the domains we want to reset, then the reset bit (bit 0). + * Clear the reset bit after a while and wait for hardware status + * bit (bit 1) to be set + */ + pci_read_config_byte(dev->pdev, GDRST, &gdrst); + pci_write_config_byte(dev->pdev, GDRST, gdrst | flags | ((flags == GDRST_FULL) ? 0x1 : 0x0)); + udelay(50); + pci_write_config_byte(dev->pdev, GDRST, gdrst & 0xfe); + + /* ...we don't want to loop forever though, 500ms should be plenty */ + timeout = jiffies + msecs_to_jiffies(500); + do { + udelay(100); + pci_read_config_byte(dev->pdev, GDRST, &gdrst); + } while ((gdrst & 0x1) && time_after(timeout, jiffies)); + + if (gdrst & 0x1) { + WARN(true, "i915: Failed to reset chip\n"); + mutex_unlock(&dev->struct_mutex); + return -EIO; + } + } else { + DRM_ERROR("Error occurred. Don't know how to reset this chip.\n"); + return -ENODEV; + } + + /* Ok, now get things going again... */ + + /* + * Everything depends on having the GTT running, so we need to start + * there. Fortunately we don't need to do this unless we reset the + * chip at a PCI level. + * + * Next we need to restore the context, but we don't use those + * yet either... + * + * Ring buffer needs to be re-initialized in the KMS case, or if X + * was running at the time of the reset (i.e. we weren't VT + * switched away). + */ + if (drm_core_check_feature(dev, DRIVER_MODESET) || + !dev_priv->mm.suspended) { + drm_i915_ring_buffer_t *ring = &dev_priv->ring; + struct drm_gem_object *obj = ring->ring_obj; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + dev_priv->mm.suspended = 0; + + /* Stop the ring if it's running. */ + I915_WRITE(PRB0_CTL, 0); + I915_WRITE(PRB0_TAIL, 0); + I915_WRITE(PRB0_HEAD, 0); + + /* Initialize the ring. */ + I915_WRITE(PRB0_START, obj_priv->gtt_offset); + I915_WRITE(PRB0_CTL, + ((obj->size - 4096) & RING_NR_PAGES) | + RING_NO_REPORT | + RING_VALID); + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + i915_kernel_lost_context(dev); + else { + ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; + ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR; + ring->space = ring->head - (ring->tail + 8); + if (ring->space < 0) + ring->space += ring->Size; + } + + mutex_unlock(&dev->struct_mutex); + drm_irq_uninstall(dev); + drm_irq_install(dev); + mutex_lock(&dev->struct_mutex); + } + + /* + * Display needs restore too... + */ + if (need_display) + i915_restore_display(dev); + + mutex_unlock(&dev->struct_mutex); + return 0; +} + + static int __devinit i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -234,6 +360,8 @@ static int __init i915_init(void) { driver.num_ioctls = i915_max_ioctl; + i915_gem_shrinker_init(); + /* * If CONFIG_DRM_I915_KMS is set, default to KMS unless * explicitly disabled with the module pararmeter. @@ -260,6 +388,7 @@ static int __init i915_init(void) static void __exit i915_exit(void) { + i915_gem_shrinker_exit(); drm_exit(&driver); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a0632f8e76a..b24b2d145b7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -48,6 +48,11 @@ enum pipe { PIPE_B, }; +enum plane { + PLANE_A = 0, + PLANE_B, +}; + #define I915_NUM_PIPE 2 /* Interface history: @@ -148,6 +153,23 @@ struct drm_i915_error_state { struct timeval time; }; +struct drm_i915_display_funcs { + void (*dpms)(struct drm_crtc *crtc, int mode); + bool (*fbc_enabled)(struct drm_crtc *crtc); + void (*enable_fbc)(struct drm_crtc *crtc, unsigned long interval); + void (*disable_fbc)(struct drm_device *dev); + int (*get_display_clock_speed)(struct drm_device *dev); + int (*get_fifo_size)(struct drm_device *dev, int plane); + void (*update_wm)(struct drm_device *dev, int planea_clock, + int planeb_clock, int sr_hdisplay, int pixel_size); + /* clock updates for mode set */ + /* cursor updates */ + /* render clock increase/decrease */ + /* display clock increase/decrease */ + /* pll clock increase/decrease */ + /* clock gating init */ +}; + typedef struct drm_i915_private { struct drm_device *dev; @@ -198,10 +220,21 @@ typedef struct drm_i915_private { unsigned int sr01, adpa, ppcr, dvob, dvoc, lvds; int vblank_pipe; + /* For hangcheck timer */ +#define DRM_I915_HANGCHECK_PERIOD 75 /* in jiffies */ + struct timer_list hangcheck_timer; + int hangcheck_count; + uint32_t last_acthd; + bool cursor_needs_physical; struct drm_mm vram; + unsigned long cfb_size; + unsigned long cfb_pitch; + int cfb_fence; + int cfb_plane; + int irq_enabled; struct intel_opregion opregion; @@ -222,6 +255,8 @@ typedef struct drm_i915_private { unsigned int edp_support:1; int lvds_ssc_freq; + struct notifier_block lid_notifier; + int crt_ddc_bus; /* -1 = unknown, else GPIO to use for CRT DDC */ struct drm_i915_fence_reg fence_regs[16]; /* assume 965 */ int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */ @@ -234,7 +269,11 @@ typedef struct drm_i915_private { struct work_struct error_work; struct workqueue_struct *wq; + /* Display functions */ + struct drm_i915_display_funcs display; + /* Register state */ + bool suspended; u8 saveLBB; u32 saveDSPACNTR; u32 saveDSPBCNTR; @@ -350,6 +389,15 @@ typedef struct drm_i915_private { int gtt_mtrr; /** + * Membership on list of all loaded devices, used to evict + * inactive buffers under memory pressure. + * + * Modifications should only be done whilst holding the + * shrink_list_lock spinlock. + */ + struct list_head shrink_list; + + /** * List of objects currently involved in rendering from the * ringbuffer. * @@ -432,7 +480,7 @@ typedef struct drm_i915_private { * It prevents command submission from occuring and makes * every pending request fail */ - int wedged; + atomic_t wedged; /** Bit 6 swizzling required for X tiling */ uint32_t bit_6_swizzle_x; @@ -491,10 +539,7 @@ struct drm_i915_gem_object { * This is the same as gtt_space->start */ uint32_t gtt_offset; - /** - * Required alignment for the object - */ - uint32_t gtt_alignment; + /** * Fake offset for use by mmap(2) */ @@ -541,6 +586,11 @@ struct drm_i915_gem_object { * in an execbuffer object list. */ int in_execbuffer; + + /** + * Advice: are the backing pages purgeable? + */ + int madv; }; /** @@ -585,6 +635,8 @@ extern int i915_max_ioctl; extern unsigned int i915_fbpercrtc; extern unsigned int i915_powersave; +extern void i915_save_display(struct drm_device *dev); +extern void i915_restore_display(struct drm_device *dev); extern int i915_master_create(struct drm_device *dev, struct drm_master *master); extern void i915_master_destroy(struct drm_device *dev, struct drm_master *master); @@ -604,8 +656,10 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, extern int i915_emit_box(struct drm_device *dev, struct drm_clip_rect *boxes, int i, int DR1, int DR4); +extern int i965_reset(struct drm_device *dev, u8 flags); /* i915_irq.c */ +void i915_hangcheck_elapsed(unsigned long data); extern int i915_irq_emit(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int i915_irq_wait(struct drm_device *dev, void *data, @@ -676,6 +730,8 @@ int i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_throttle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int i915_gem_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); int i915_gem_entervt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, @@ -695,6 +751,7 @@ int i915_gem_object_unbind(struct drm_gem_object *obj); void i915_gem_release_mmap(struct drm_gem_object *obj); void i915_gem_lastclose(struct drm_device *dev); uint32_t i915_get_gem_seqno(struct drm_device *dev); +bool i915_seqno_passed(uint32_t seq1, uint32_t seq2); int i915_gem_object_get_fence_reg(struct drm_gem_object *obj); int i915_gem_object_put_fence_reg(struct drm_gem_object *obj); void i915_gem_retire_requests(struct drm_device *dev); @@ -720,6 +777,9 @@ int i915_gem_object_get_pages(struct drm_gem_object *obj); void i915_gem_object_put_pages(struct drm_gem_object *obj); void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv); +void i915_gem_shrinker_init(void); +void i915_gem_shrinker_exit(void); + /* i915_gem_tiling.c */ void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj); @@ -767,6 +827,8 @@ static inline void opregion_enable_asle(struct drm_device *dev) { return; } extern void intel_modeset_init(struct drm_device *dev); extern void intel_modeset_cleanup(struct drm_device *dev); extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state); +extern void i8xx_disable_fbc(struct drm_device *dev); +extern void g4x_disable_fbc(struct drm_device *dev); /** * Lock test for when it's just for synchronization of ring access. @@ -864,6 +926,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); (dev)->pci_device == 0x2E12 || \ (dev)->pci_device == 0x2E22 || \ (dev)->pci_device == 0x2E32 || \ + (dev)->pci_device == 0x2E42 || \ (dev)->pci_device == 0x0042 || \ (dev)->pci_device == 0x0046) @@ -876,6 +939,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); (dev)->pci_device == 0x2E12 || \ (dev)->pci_device == 0x2E22 || \ (dev)->pci_device == 0x2E32 || \ + (dev)->pci_device == 0x2E42 || \ IS_GM45(dev)) #define IS_IGDG(dev) ((dev)->pci_device == 0xa001) @@ -909,12 +973,13 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); #define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev) || IS_IGDNG(dev)) #define SUPPORTS_INTEGRATED_DP(dev) (IS_G4X(dev) || IS_IGDNG(dev)) #define SUPPORTS_EDP(dev) (IS_IGDNG_M(dev)) -#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_I965G(dev)) +#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev) || IS_I965G(dev)) /* dsparb controlled by hw only */ #define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IGDNG(dev)) #define HAS_FW_BLC(dev) (IS_I9XX(dev) || IS_G4X(dev) || IS_IGDNG(dev)) #define HAS_PIPE_CXSR(dev) (IS_G4X(dev) || IS_IGDNG(dev)) +#define I915_HAS_FBC(dev) (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev))) #define PRIMARY_RINGBUFFER_SIZE (128*1024) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c67317112f4..40727d4c291 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,6 +29,7 @@ #include "drm.h" #include "i915_drm.h" #include "i915_drv.h" +#include "i915_trace.h" #include "intel_drv.h" #include <linux/swap.h> #include <linux/pci.h> @@ -48,11 +49,15 @@ static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment); static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); -static int i915_gem_evict_something(struct drm_device *dev); +static int i915_gem_evict_something(struct drm_device *dev, int min_size); +static int i915_gem_evict_from_inactive_list(struct drm_device *dev); static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file_priv); +static LIST_HEAD(shrink_list); +static DEFINE_SPINLOCK(shrink_list_lock); + int i915_gem_do_init(struct drm_device *dev, unsigned long start, unsigned long end) { @@ -316,6 +321,45 @@ fail_unlock: return ret; } +static inline gfp_t +i915_gem_object_get_page_gfp_mask (struct drm_gem_object *obj) +{ + return mapping_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping); +} + +static inline void +i915_gem_object_set_page_gfp_mask (struct drm_gem_object *obj, gfp_t gfp) +{ + mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping, gfp); +} + +static int +i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj) +{ + int ret; + + ret = i915_gem_object_get_pages(obj); + + /* If we've insufficient memory to map in the pages, attempt + * to make some space by throwing out some old buffers. + */ + if (ret == -ENOMEM) { + struct drm_device *dev = obj->dev; + gfp_t gfp; + + ret = i915_gem_evict_something(dev, obj->size); + if (ret) + return ret; + + gfp = i915_gem_object_get_page_gfp_mask(obj); + i915_gem_object_set_page_gfp_mask(obj, gfp & ~__GFP_NORETRY); + ret = i915_gem_object_get_pages(obj); + i915_gem_object_set_page_gfp_mask (obj, gfp); + } + + return ret; +} + /** * This is the fallback shmem pread path, which allocates temporary storage * in kernel space to copy_to_user into outside of the struct_mutex, so we @@ -367,8 +411,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, mutex_lock(&dev->struct_mutex); - ret = i915_gem_object_get_pages(obj); - if (ret != 0) + ret = i915_gem_object_get_pages_or_evict(obj); + if (ret) goto fail_unlock; ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, @@ -842,8 +886,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, mutex_lock(&dev->struct_mutex); - ret = i915_gem_object_get_pages(obj); - if (ret != 0) + ret = i915_gem_object_get_pages_or_evict(obj); + if (ret) goto fail_unlock; ret = i915_gem_object_set_to_cpu_domain(obj, 1); @@ -1155,28 +1199,22 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Now bind it into the GTT if needed */ mutex_lock(&dev->struct_mutex); if (!obj_priv->gtt_space) { - ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return VM_FAULT_SIGBUS; - } - - ret = i915_gem_object_set_to_gtt_domain(obj, write); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return VM_FAULT_SIGBUS; - } + ret = i915_gem_object_bind_to_gtt(obj, 0); + if (ret) + goto unlock; list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); + + ret = i915_gem_object_set_to_gtt_domain(obj, write); + if (ret) + goto unlock; } /* Need a new fence register? */ if (obj_priv->tiling_mode != I915_TILING_NONE) { ret = i915_gem_object_get_fence_reg(obj); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return VM_FAULT_SIGBUS; - } + if (ret) + goto unlock; } pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + @@ -1184,18 +1222,18 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Finally, remap it using the new GTT offset */ ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); - +unlock: mutex_unlock(&dev->struct_mutex); switch (ret) { + case 0: + case -ERESTARTSYS: + return VM_FAULT_NOPAGE; case -ENOMEM: case -EAGAIN: return VM_FAULT_OOM; - case -EFAULT: - case -EINVAL: - return VM_FAULT_SIGBUS; default: - return VM_FAULT_NOPAGE; + return VM_FAULT_SIGBUS; } } @@ -1388,6 +1426,14 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, obj_priv = obj->driver_private; + if (obj_priv->madv != I915_MADV_WILLNEED) { + DRM_ERROR("Attempting to mmap a purgeable buffer\n"); + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + return -EINVAL; + } + + if (!obj_priv->mmap_offset) { ret = i915_gem_create_mmap_offset(obj); if (ret) { @@ -1399,22 +1445,12 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, args->offset = obj_priv->mmap_offset; - obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj); - - /* Make sure the alignment is correct for fence regs etc */ - if (obj_priv->agp_mem && - (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) { - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } - /* * Pull it into the GTT so that we have a page list (makes the * initial fault faster and any subsequent flushing possible). */ if (!obj_priv->agp_mem) { - ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); + ret = i915_gem_object_bind_to_gtt(obj, 0); if (ret) { drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); @@ -1437,6 +1473,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) int i; BUG_ON(obj_priv->pages_refcount == 0); + BUG_ON(obj_priv->madv == __I915_MADV_PURGED); if (--obj_priv->pages_refcount != 0) return; @@ -1444,13 +1481,21 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) if (obj_priv->tiling_mode != I915_TILING_NONE) i915_gem_object_save_bit_17_swizzle(obj); - for (i = 0; i < page_count; i++) - if (obj_priv->pages[i] != NULL) { - if (obj_priv->dirty) - set_page_dirty(obj_priv->pages[i]); + if (obj_priv->madv == I915_MADV_DONTNEED) + obj_priv->dirty = 0; + + for (i = 0; i < page_count; i++) { + if (obj_priv->pages[i] == NULL) + break; + + if (obj_priv->dirty) + set_page_dirty(obj_priv->pages[i]); + + if (obj_priv->madv == I915_MADV_WILLNEED) mark_page_accessed(obj_priv->pages[i]); - page_cache_release(obj_priv->pages[i]); - } + + page_cache_release(obj_priv->pages[i]); + } obj_priv->dirty = 0; drm_free_large(obj_priv->pages); @@ -1489,6 +1534,26 @@ i915_gem_object_move_to_flushing(struct drm_gem_object *obj) obj_priv->last_rendering_seqno = 0; } +/* Immediately discard the backing storage */ +static void +i915_gem_object_truncate(struct drm_gem_object *obj) +{ + struct drm_i915_gem_object *obj_priv = obj->driver_private; + struct inode *inode; + + inode = obj->filp->f_path.dentry->d_inode; + if (inode->i_op->truncate) + inode->i_op->truncate (inode); + + obj_priv->madv = __I915_MADV_PURGED; +} + +static inline int +i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv) +{ + return obj_priv->madv == I915_MADV_DONTNEED; +} + static void i915_gem_object_move_to_inactive(struct drm_gem_object *obj) { @@ -1577,15 +1642,24 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv, if ((obj->write_domain & flush_domains) == obj->write_domain) { + uint32_t old_write_domain = obj->write_domain; + obj->write_domain = 0; i915_gem_object_move_to_active(obj, seqno); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } } } - if (was_empty && !dev_priv->mm.suspended) - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + if (!dev_priv->mm.suspended) { + mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); + if (was_empty) + queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + } return seqno; } @@ -1623,6 +1697,8 @@ i915_gem_retire_request(struct drm_device *dev, { drm_i915_private_t *dev_priv = dev->dev_private; + trace_i915_gem_request_retire(dev, request->seqno); + /* Move any buffers on the active list that are no longer referenced * by the ringbuffer to the flushing/inactive lists as appropriate. */ @@ -1671,7 +1747,7 @@ out: /** * Returns true if seq1 is later than seq2. */ -static int +bool i915_seqno_passed(uint32_t seq1, uint32_t seq2) { return (int32_t)(seq1 - seq2) >= 0; @@ -1709,7 +1785,7 @@ i915_gem_retire_requests(struct drm_device *dev) retiring_seqno = request->seqno; if (i915_seqno_passed(seqno, retiring_seqno) || - dev_priv->mm.wedged) { + atomic_read(&dev_priv->mm.wedged)) { i915_gem_retire_request(dev, request); list_del(&request->list); @@ -1751,6 +1827,9 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno) BUG_ON(seqno == 0); + if (atomic_read(&dev_priv->mm.wedged)) + return -EIO; + if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { if (IS_IGDNG(dev)) ier = I915_READ(DEIER) | I915_READ(GTIER); @@ -1763,16 +1842,20 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno) i915_driver_irq_postinstall(dev); } + trace_i915_gem_request_wait_begin(dev, seqno); + dev_priv->mm.waiting_gem_seqno = seqno; i915_user_irq_get(dev); ret = wait_event_interruptible(dev_priv->irq_queue, i915_seqno_passed(i915_get_gem_seqno(dev), seqno) || - dev_priv->mm.wedged); + atomic_read(&dev_priv->mm.wedged)); i915_user_irq_put(dev); dev_priv->mm.waiting_gem_seqno = 0; + + trace_i915_gem_request_wait_end(dev, seqno); } - if (dev_priv->mm.wedged) + if (atomic_read(&dev_priv->mm.wedged)) ret = -EIO; if (ret && ret != -ERESTARTSYS) @@ -1803,6 +1886,8 @@ i915_gem_flush(struct drm_device *dev, DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, invalidate_domains, flush_domains); #endif + trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno, + invalidate_domains, flush_domains); if (flush_domains & I915_GEM_DOMAIN_CPU) drm_agp_chipset_flush(dev); @@ -1915,6 +2000,12 @@ i915_gem_object_unbind(struct drm_gem_object *obj) return -EINVAL; } + /* blow away mappings if mapped through GTT */ + i915_gem_release_mmap(obj); + + if (obj_priv->fence_reg != I915_FENCE_REG_NONE) + i915_gem_clear_fence_reg(obj); + /* Move the object to the CPU domain to ensure that * any possible CPU writes while it's not in the GTT * are flushed when we go to remap it. This will @@ -1928,21 +2019,16 @@ i915_gem_object_unbind(struct drm_gem_object *obj) return ret; } + BUG_ON(obj_priv->active); + if (obj_priv->agp_mem != NULL) { drm_unbind_agp(obj_priv->agp_mem); drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); obj_priv->agp_mem = NULL; } - BUG_ON(obj_priv->active); - - /* blow away mappings if mapped through GTT */ - i915_gem_release_mmap(obj); - - if (obj_priv->fence_reg != I915_FENCE_REG_NONE) - i915_gem_clear_fence_reg(obj); - i915_gem_object_put_pages(obj); + BUG_ON(obj_priv->pages_refcount); if (obj_priv->gtt_space) { atomic_dec(&dev->gtt_count); @@ -1956,40 +2042,113 @@ i915_gem_object_unbind(struct drm_gem_object *obj) if (!list_empty(&obj_priv->list)) list_del_init(&obj_priv->list); + if (i915_gem_object_is_purgeable(obj_priv)) + i915_gem_object_truncate(obj); + + trace_i915_gem_object_unbind(obj); + return 0; } +static struct drm_gem_object * +i915_gem_find_inactive_object(struct drm_device *dev, int min_size) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv; + struct drm_gem_object *best = NULL; + struct drm_gem_object *first = NULL; + + /* Try to find the smallest clean object */ + list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) { + struct drm_gem_object *obj = obj_priv->obj; + if (obj->size >= min_size) { + if ((!obj_priv->dirty || + i915_gem_object_is_purgeable(obj_priv)) && + (!best || obj->size < best->size)) { + best = obj; + if (best->size == min_size) + return best; + } + if (!first) + first = obj; + } + } + + return best ? best : first; +} + static int -i915_gem_evict_something(struct drm_device *dev) +i915_gem_evict_everything(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + uint32_t seqno; + int ret; + bool lists_empty; + + spin_lock(&dev_priv->mm.active_list_lock); + lists_empty = (list_empty(&dev_priv->mm.inactive_list) && + list_empty(&dev_priv->mm.flushing_list) && + list_empty(&dev_priv->mm.active_list)); + spin_unlock(&dev_priv->mm.active_list_lock); + + if (lists_empty) + return -ENOSPC; + + /* Flush everything (on to the inactive lists) and evict */ + i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS); + if (seqno == 0) + return -ENOMEM; + + ret = i915_wait_request(dev, seqno); + if (ret) + return ret; + + ret = i915_gem_evict_from_inactive_list(dev); + if (ret) + return ret; + + spin_lock(&dev_priv->mm.active_list_lock); + lists_empty = (list_empty(&dev_priv->mm.inactive_list) && + list_empty(&dev_priv->mm.flushing_list) && + list_empty(&dev_priv->mm.active_list)); + spin_unlock(&dev_priv->mm.active_list_lock); + BUG_ON(!lists_empty); + + return 0; +} + +static int +i915_gem_evict_something(struct drm_device *dev, int min_size) { drm_i915_private_t *dev_priv = dev->dev_private; struct drm_gem_object *obj; - struct drm_i915_gem_object *obj_priv; - int ret = 0; + int ret; for (;;) { + i915_gem_retire_requests(dev); + /* If there's an inactive buffer available now, grab it * and be done. */ - if (!list_empty(&dev_priv->mm.inactive_list)) { - obj_priv = list_first_entry(&dev_priv->mm.inactive_list, - struct drm_i915_gem_object, - list); - obj = obj_priv->obj; - BUG_ON(obj_priv->pin_count != 0); + obj = i915_gem_find_inactive_object(dev, min_size); + if (obj) { + struct drm_i915_gem_object *obj_priv; + #if WATCH_LRU DRM_INFO("%s: evicting %p\n", __func__, obj); #endif + obj_priv = obj->driver_private; + BUG_ON(obj_priv->pin_count != 0); BUG_ON(obj_priv->active); /* Wait on the rendering and unbind the buffer. */ - ret = i915_gem_object_unbind(obj); - break; + return i915_gem_object_unbind(obj); } /* If we didn't get anything, but the ring is still processing - * things, wait for one of those things to finish and hopefully - * leave us a buffer to evict. + * things, wait for the next to finish and hopefully leave us + * a buffer to evict. */ if (!list_empty(&dev_priv->mm.request_list)) { struct drm_i915_gem_request *request; @@ -2000,16 +2159,9 @@ i915_gem_evict_something(struct drm_device *dev) ret = i915_wait_request(dev, request->seqno); if (ret) - break; + return ret; - /* if waiting caused an object to become inactive, - * then loop around and wait for it. Otherwise, we - * assume that waiting freed and unbound something, - * so there should now be some space in the GTT - */ - if (!list_empty(&dev_priv->mm.inactive_list)) - continue; - break; + continue; } /* If we didn't have anything on the request list but there @@ -2018,46 +2170,44 @@ i915_gem_evict_something(struct drm_device *dev) * will get moved to inactive. */ if (!list_empty(&dev_priv->mm.flushing_list)) { - obj_priv = list_first_entry(&dev_priv->mm.flushing_list, - struct drm_i915_gem_object, - list); - obj = obj_priv->obj; + struct drm_i915_gem_object *obj_priv; - i915_gem_flush(dev, - obj->write_domain, - obj->write_domain); - i915_add_request(dev, NULL, obj->write_domain); + /* Find an object that we can immediately reuse */ + list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) { + obj = obj_priv->obj; + if (obj->size >= min_size) + break; - obj = NULL; - continue; - } + obj = NULL; + } - DRM_ERROR("inactive empty %d request empty %d " - "flushing empty %d\n", - list_empty(&dev_priv->mm.inactive_list), - list_empty(&dev_priv->mm.request_list), - list_empty(&dev_priv->mm.flushing_list)); - /* If we didn't do any of the above, there's nothing to be done - * and we just can't fit it in. - */ - return -ENOSPC; - } - return ret; -} + if (obj != NULL) { + uint32_t seqno; -static int -i915_gem_evict_everything(struct drm_device *dev) -{ - int ret; + i915_gem_flush(dev, + obj->write_domain, + obj->write_domain); + seqno = i915_add_request(dev, NULL, obj->write_domain); + if (seqno == 0) + return -ENOMEM; - for (;;) { - ret = i915_gem_evict_something(dev); - if (ret != 0) - break; + ret = i915_wait_request(dev, seqno); + if (ret) + return ret; + + continue; + } + } + + /* If we didn't do any of the above, there's no single buffer + * large enough to swap out for the new one, so just evict + * everything and start again. (This should be rare.) + */ + if (!list_empty (&dev_priv->mm.inactive_list)) + return i915_gem_evict_from_inactive_list(dev); + else + return i915_gem_evict_everything(dev); } - if (ret == -ENOSPC) - return 0; - return ret; } int @@ -2080,7 +2230,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) BUG_ON(obj_priv->pages != NULL); obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *)); if (obj_priv->pages == NULL) { - DRM_ERROR("Faled to allocate page list\n"); obj_priv->pages_refcount--; return -ENOMEM; } @@ -2091,7 +2240,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) page = read_mapping_page(mapping, i, NULL); if (IS_ERR(page)) { ret = PTR_ERR(page); - DRM_ERROR("read_mapping_page failed: %d\n", ret); i915_gem_object_put_pages(obj); return ret; } @@ -2328,6 +2476,8 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) else i830_write_fence_reg(reg); + trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode); + return 0; } @@ -2410,10 +2560,17 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj_priv = obj->driver_private; struct drm_mm_node *free_space; - int page_count, ret; + bool retry_alloc = false; + int ret; if (dev_priv->mm.suspended) return -EBUSY; + + if (obj_priv->madv != I915_MADV_WILLNEED) { + DRM_ERROR("Attempting to bind a purgeable object\n"); + return -EINVAL; + } + if (alignment == 0) alignment = i915_gem_get_gtt_alignment(obj); if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) { @@ -2433,30 +2590,16 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) } } if (obj_priv->gtt_space == NULL) { - bool lists_empty; - /* If the gtt is empty and we're still having trouble * fitting our object in, we're out of memory. */ #if WATCH_LRU DRM_INFO("%s: GTT full, evicting something\n", __func__); #endif - spin_lock(&dev_priv->mm.active_list_lock); - lists_empty = (list_empty(&dev_priv->mm.inactive_list) && - list_empty(&dev_priv->mm.flushing_list) && - list_empty(&dev_priv->mm.active_list)); - spin_unlock(&dev_priv->mm.active_list_lock); - if (lists_empty) { - DRM_ERROR("GTT full, but LRU list empty\n"); - return -ENOSPC; - } - - ret = i915_gem_evict_something(dev); - if (ret != 0) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Failed to evict a buffer %d\n", ret); + ret = i915_gem_evict_something(dev, obj->size); + if (ret) return ret; - } + goto search_free; } @@ -2464,27 +2607,56 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) DRM_INFO("Binding object of size %zd at 0x%08x\n", obj->size, obj_priv->gtt_offset); #endif + if (retry_alloc) { + i915_gem_object_set_page_gfp_mask (obj, + i915_gem_object_get_page_gfp_mask (obj) & ~__GFP_NORETRY); + } ret = i915_gem_object_get_pages(obj); + if (retry_alloc) { + i915_gem_object_set_page_gfp_mask (obj, + i915_gem_object_get_page_gfp_mask (obj) | __GFP_NORETRY); + } if (ret) { drm_mm_put_block(obj_priv->gtt_space); obj_priv->gtt_space = NULL; + + if (ret == -ENOMEM) { + /* first try to clear up some space from the GTT */ + ret = i915_gem_evict_something(dev, obj->size); + if (ret) { + /* now try to shrink everyone else */ + if (! retry_alloc) { + retry_alloc = true; + goto search_free; + } + + return ret; + } + + goto search_free; + } + return ret; } - page_count = obj->size / PAGE_SIZE; /* Create an AGP memory structure pointing at our pages, and bind it * into the GTT. */ obj_priv->agp_mem = drm_agp_bind_pages(dev, obj_priv->pages, - page_count, + obj->size >> PAGE_SHIFT, obj_priv->gtt_offset, obj_priv->agp_type); if (obj_priv->agp_mem == NULL) { i915_gem_object_put_pages(obj); drm_mm_put_block(obj_priv->gtt_space); obj_priv->gtt_space = NULL; - return -ENOMEM; + + ret = i915_gem_evict_something(dev, obj->size); + if (ret) + return ret; + + goto search_free; } atomic_inc(&dev->gtt_count); atomic_add(obj->size, &dev->gtt_memory); @@ -2496,6 +2668,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); + trace_i915_gem_object_bind(obj, obj_priv->gtt_offset); + return 0; } @@ -2511,15 +2685,7 @@ i915_gem_clflush_object(struct drm_gem_object *obj) if (obj_priv->pages == NULL) return; - /* XXX: The 865 in particular appears to be weird in how it handles - * cache flushing. We haven't figured it out, but the - * clflush+agp_chipset_flush doesn't appear to successfully get the - * data visible to the PGU, while wbinvd + agp_chipset_flush does. - */ - if (IS_I865G(obj->dev)) { - wbinvd(); - return; - } + trace_i915_gem_object_clflush(obj); drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); } @@ -2530,21 +2696,29 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; uint32_t seqno; + uint32_t old_write_domain; if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) return; /* Queue the GPU write cache flushing we need. */ + old_write_domain = obj->write_domain; i915_gem_flush(dev, 0, obj->write_domain); seqno = i915_add_request(dev, NULL, obj->write_domain); obj->write_domain = 0; i915_gem_object_move_to_active(obj, seqno); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /** Flushes the GTT write domain for the object if it's dirty. */ static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) { + uint32_t old_write_domain; + if (obj->write_domain != I915_GEM_DOMAIN_GTT) return; @@ -2552,7 +2726,12 @@ i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) * to it immediately go to main memory as far as we know, so there's * no chipset flush. It also doesn't land in render cache. */ + old_write_domain = obj->write_domain; obj->write_domain = 0; + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /** Flushes the CPU write domain for the object if it's dirty. */ @@ -2560,13 +2739,19 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; + uint32_t old_write_domain; if (obj->write_domain != I915_GEM_DOMAIN_CPU) return; i915_gem_clflush_object(obj); drm_agp_chipset_flush(dev); + old_write_domain = obj->write_domain; obj->write_domain = 0; + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /** @@ -2579,6 +2764,7 @@ int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) { struct drm_i915_gem_object *obj_priv = obj->driver_private; + uint32_t old_write_domain, old_read_domains; int ret; /* Not valid to be called on unbound objects. */ @@ -2591,6 +2777,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) if (ret != 0) return ret; + old_write_domain = obj->write_domain; + old_read_domains = obj->read_domains; + /* If we're writing through the GTT domain, then CPU and GPU caches * will need to be invalidated at next use. */ @@ -2609,6 +2798,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) obj_priv->dirty = 1; } + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); + return 0; } @@ -2621,6 +2814,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) { + uint32_t old_write_domain, old_read_domains; int ret; i915_gem_object_flush_gpu_write_domain(obj); @@ -2636,6 +2830,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) */ i915_gem_object_set_to_full_cpu_read_domain(obj); + old_write_domain = obj->write_domain; + old_read_domains = obj->read_domains; + /* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { i915_gem_clflush_object(obj); @@ -2656,6 +2853,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) obj->write_domain = I915_GEM_DOMAIN_CPU; } + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); + return 0; } @@ -2777,6 +2978,7 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) struct drm_i915_gem_object *obj_priv = obj->driver_private; uint32_t invalidate_domains = 0; uint32_t flush_domains = 0; + uint32_t old_read_domains; BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU); BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU); @@ -2823,6 +3025,8 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) i915_gem_clflush_object(obj); } + old_read_domains = obj->read_domains; + /* The actual obj->write_domain will be updated with * pending_write_domain after we emit the accumulated flush for all * of our domain changes in execbuffers (which clears objects' @@ -2841,6 +3045,10 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) obj->read_domains, obj->write_domain, dev->invalidate_domains, dev->flush_domains); #endif + + trace_i915_gem_object_change_domain(obj, + old_read_domains, + obj->write_domain); } /** @@ -2893,6 +3101,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, uint64_t offset, uint64_t size) { struct drm_i915_gem_object *obj_priv = obj->driver_private; + uint32_t old_read_domains; int i, ret; if (offset == 0 && size == obj->size) @@ -2939,8 +3148,13 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, */ BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); + old_read_domains = obj->read_domains; obj->read_domains |= I915_GEM_DOMAIN_CPU; + trace_i915_gem_object_change_domain(obj, + old_read_domains, + obj->write_domain); + return 0; } @@ -2984,6 +3198,21 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, } target_obj_priv = target_obj->driver_private; +#if WATCH_RELOC + DRM_INFO("%s: obj %p offset %08x target %d " + "read %08x write %08x gtt %08x " + "presumed %08x delta %08x\n", + __func__, + obj, + (int) reloc->offset, + (int) reloc->target_handle, + (int) reloc->read_domains, + (int) reloc->write_domain, + (int) target_obj_priv->gtt_offset, + (int) reloc->presumed_offset, + reloc->delta); +#endif + /* The target buffer should have appeared before us in the * exec_object list, so it should have a GTT space bound by now. */ @@ -2995,25 +3224,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, return -EINVAL; } - if (reloc->offset > obj->size - 4) { - DRM_ERROR("Relocation beyond object bounds: " - "obj %p target %d offset %d size %d.\n", - obj, reloc->target_handle, - (int) reloc->offset, (int) obj->size); - drm_gem_object_unreference(target_obj); - i915_gem_object_unpin(obj); - return -EINVAL; - } - if (reloc->offset & 3) { - DRM_ERROR("Relocation not 4-byte aligned: " - "obj %p target %d offset %d.\n", - obj, reloc->target_handle, - (int) reloc->offset); - drm_gem_object_unreference(target_obj); - i915_gem_object_unpin(obj); - return -EINVAL; - } - + /* Validate that the target is in a valid r/w GPU domain */ if (reloc->write_domain & I915_GEM_DOMAIN_CPU || reloc->read_domains & I915_GEM_DOMAIN_CPU) { DRM_ERROR("reloc with read/write CPU domains: " @@ -3027,7 +3238,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, i915_gem_object_unpin(obj); return -EINVAL; } - if (reloc->write_domain && target_obj->pending_write_domain && reloc->write_domain != target_obj->pending_write_domain) { DRM_ERROR("Write domain conflict: " @@ -3042,21 +3252,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, return -EINVAL; } -#if WATCH_RELOC - DRM_INFO("%s: obj %p offset %08x target %d " - "read %08x write %08x gtt %08x " - "presumed %08x delta %08x\n", - __func__, - obj, - (int) reloc->offset, - (int) reloc->target_handle, - (int) reloc->read_domains, - (int) reloc->write_domain, - (int) target_obj_priv->gtt_offset, - (int) reloc->presumed_offset, - reloc->delta); -#endif - target_obj->pending_read_domains |= reloc->read_domains; target_obj->pending_write_domain |= reloc->write_domain; @@ -3068,6 +3263,37 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, continue; } + /* Check that the relocation address is valid... */ + if (reloc->offset > obj->size - 4) { + DRM_ERROR("Relocation beyond object bounds: " + "obj %p target %d offset %d size %d.\n", + obj, reloc->target_handle, + (int) reloc->offset, (int) obj->size); + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); + return -EINVAL; + } + if (reloc->offset & 3) { + DRM_ERROR("Relocation not 4-byte aligned: " + "obj %p target %d offset %d.\n", + obj, reloc->target_handle, + (int) reloc->offset); + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); + return -EINVAL; + } + + /* and points to somewhere within the target object. */ + if (reloc->delta >= target_obj->size) { + DRM_ERROR("Relocation beyond target object bounds: " + "obj %p target %d delta %d size %d.\n", + obj, reloc->target_handle, + (int) reloc->delta, (int) target_obj->size); + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); + return -EINVAL; + } + ret = i915_gem_object_set_to_gtt_domain(obj, 1); if (ret != 0) { drm_gem_object_unreference(target_obj); @@ -3126,6 +3352,8 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev, exec_start = (uint32_t) exec_offset + exec->batch_start_offset; exec_len = (uint32_t) exec->batch_len; + trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno); + count = nbox ? nbox : 1; for (i = 0; i < count; i++) { @@ -3363,7 +3591,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, i915_verify_inactive(dev, __FILE__, __LINE__); - if (dev_priv->mm.wedged) { + if (atomic_read(&dev_priv->mm.wedged)) { DRM_ERROR("Execbuf while wedged\n"); mutex_unlock(&dev->struct_mutex); ret = -EIO; @@ -3421,8 +3649,23 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, /* error other than GTT full, or we've already tried again */ if (ret != -ENOSPC || pin_tries >= 1) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Failed to pin buffers %d\n", ret); + if (ret != -ERESTARTSYS) { + unsigned long long total_size = 0; + for (i = 0; i < args->buffer_count; i++) + total_size += object_list[i]->size; + DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes: %d\n", + pinned+1, args->buffer_count, + total_size, ret); + DRM_ERROR("%d objects [%d pinned], " + "%d object bytes [%d pinned], " + "%d/%d gtt bytes\n", + atomic_read(&dev->object_count), + atomic_read(&dev->pin_count), + atomic_read(&dev->object_memory), + atomic_read(&dev->pin_memory), + atomic_read(&dev->gtt_memory), + dev->gtt_total); + } goto err; } @@ -3433,7 +3676,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, /* evict everyone we can from the aperture */ ret = i915_gem_evict_everything(dev); - if (ret) + if (ret && ret != -ENOSPC) goto err; } @@ -3489,8 +3732,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, for (i = 0; i < args->buffer_count; i++) { struct drm_gem_object *obj = object_list[i]; + uint32_t old_write_domain = obj->write_domain; obj->write_domain = obj->pending_write_domain; + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } i915_verify_inactive(dev, __FILE__, __LINE__); @@ -3607,11 +3854,8 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) i915_verify_inactive(dev, __FILE__, __LINE__); if (obj_priv->gtt_space == NULL) { ret = i915_gem_object_bind_to_gtt(obj, alignment); - if (ret != 0) { - if (ret != -EBUSY && ret != -ERESTARTSYS) - DRM_ERROR("Failure to bind: %d\n", ret); + if (ret) return ret; - } } /* * Pre-965 chips need a fence register set up in order to @@ -3691,6 +3935,13 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, } obj_priv = obj->driver_private; + if (obj_priv->madv != I915_MADV_WILLNEED) { + DRM_ERROR("Attempting to pin a purgeable buffer\n"); + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + return -EINVAL; + } + if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) { DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", args->handle); @@ -3803,6 +4054,56 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, return i915_gem_ring_throttle(dev, file_priv); } +int +i915_gem_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_madvise *args = data; + struct drm_gem_object *obj; + struct drm_i915_gem_object *obj_priv; + + switch (args->madv) { + case I915_MADV_DONTNEED: + case I915_MADV_WILLNEED: + break; + default: + return -EINVAL; + } + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) { + DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n", + args->handle); + return -EBADF; + } + + mutex_lock(&dev->struct_mutex); + obj_priv = obj->driver_private; + + if (obj_priv->pin_count) { + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + + DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n"); + return -EINVAL; + } + + if (obj_priv->madv != __I915_MADV_PURGED) + obj_priv->madv = args->madv; + + /* if the object is no longer bound, discard its backing storage */ + if (i915_gem_object_is_purgeable(obj_priv) && + obj_priv->gtt_space == NULL) + i915_gem_object_truncate(obj); + + args->retained = obj_priv->madv != __I915_MADV_PURGED; + + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + + return 0; +} + int i915_gem_init_object(struct drm_gem_object *obj) { struct drm_i915_gem_object *obj_priv; @@ -3827,6 +4128,9 @@ int i915_gem_init_object(struct drm_gem_object *obj) obj_priv->fence_reg = I915_FENCE_REG_NONE; INIT_LIST_HEAD(&obj_priv->list); INIT_LIST_HEAD(&obj_priv->fence_list); + obj_priv->madv = I915_MADV_WILLNEED; + + trace_i915_gem_object_create(obj); return 0; } @@ -3836,6 +4140,8 @@ void i915_gem_free_object(struct drm_gem_object *obj) struct drm_device *dev = obj->dev; struct drm_i915_gem_object *obj_priv = obj->driver_private; + trace_i915_gem_object_destroy(obj); + while (obj_priv->pin_count > 0) i915_gem_object_unpin(obj); @@ -3844,43 +4150,35 @@ void i915_gem_free_object(struct drm_gem_object *obj) i915_gem_object_unbind(obj); - i915_gem_free_mmap_offset(obj); + if (obj_priv->mmap_offset) + i915_gem_free_mmap_offset(obj); kfree(obj_priv->page_cpu_valid); kfree(obj_priv->bit_17); kfree(obj->driver_private); } -/** Unbinds all objects that are on the given buffer list. */ +/** Unbinds all inactive objects. */ static int -i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head) +i915_gem_evict_from_inactive_list(struct drm_device *dev) { - struct drm_gem_object *obj; - struct drm_i915_gem_object *obj_priv; - int ret; + drm_i915_private_t *dev_priv = dev->dev_private; - while (!list_empty(head)) { - obj_priv = list_first_entry(head, - struct drm_i915_gem_object, - list); - obj = obj_priv->obj; + while (!list_empty(&dev_priv->mm.inactive_list)) { + struct drm_gem_object *obj; + int ret; - if (obj_priv->pin_count != 0) { - DRM_ERROR("Pinned object in unbind list\n"); - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } + obj = list_first_entry(&dev_priv->mm.inactive_list, + struct drm_i915_gem_object, + list)->obj; ret = i915_gem_object_unbind(obj); if (ret != 0) { - DRM_ERROR("Error unbinding object in LeaveVT: %d\n", - ret); - mutex_unlock(&dev->struct_mutex); + DRM_ERROR("Error unbinding object: %d\n", ret); return ret; } } - return 0; } @@ -3902,6 +4200,7 @@ i915_gem_idle(struct drm_device *dev) * We need to replace this with a semaphore, or something. */ dev_priv->mm.suspended = 1; + del_timer(&dev_priv->hangcheck_timer); /* Cancel the retire work handler, wait for it to finish if running */ @@ -3931,7 +4230,7 @@ i915_gem_idle(struct drm_device *dev) if (last_seqno == cur_seqno) { if (stuck++ > 100) { DRM_ERROR("hardware wedged\n"); - dev_priv->mm.wedged = 1; + atomic_set(&dev_priv->mm.wedged, 1); DRM_WAKEUP(&dev_priv->irq_queue); break; } @@ -3944,7 +4243,7 @@ i915_gem_idle(struct drm_device *dev) i915_gem_retire_requests(dev); spin_lock(&dev_priv->mm.active_list_lock); - if (!dev_priv->mm.wedged) { + if (!atomic_read(&dev_priv->mm.wedged)) { /* Active and flushing should now be empty as we've * waited for a sequence higher than any pending execbuffer */ @@ -3962,29 +4261,41 @@ i915_gem_idle(struct drm_device *dev) * the GPU domains and just stuff them onto inactive. */ while (!list_empty(&dev_priv->mm.active_list)) { - struct drm_i915_gem_object *obj_priv; + struct drm_gem_object *obj; + uint32_t old_write_domain; - obj_priv = list_first_entry(&dev_priv->mm.active_list, - struct drm_i915_gem_object, - list); - obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; - i915_gem_object_move_to_inactive(obj_priv->obj); + obj = list_first_entry(&dev_priv->mm.active_list, + struct drm_i915_gem_object, + list)->obj; + old_write_domain = obj->write_domain; + obj->write_domain &= ~I915_GEM_GPU_DOMAINS; + i915_gem_object_move_to_inactive(obj); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } spin_unlock(&dev_priv->mm.active_list_lock); while (!list_empty(&dev_priv->mm.flushing_list)) { - struct drm_i915_gem_object *obj_priv; + struct drm_gem_object *obj; + uint32_t old_write_domain; - obj_priv = list_first_entry(&dev_priv->mm.flushing_list, - struct drm_i915_gem_object, - list); - obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; - i915_gem_object_move_to_inactive(obj_priv->obj); + obj = list_first_entry(&dev_priv->mm.flushing_list, + struct drm_i915_gem_object, + list)->obj; + old_write_domain = obj->write_domain; + obj->write_domain &= ~I915_GEM_GPU_DOMAINS; + i915_gem_object_move_to_inactive(obj); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /* Move all inactive buffers out of the GTT. */ - ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list); + ret = i915_gem_evict_from_inactive_list(dev); WARN_ON(!list_empty(&dev_priv->mm.inactive_list)); if (ret) { mutex_unlock(&dev->struct_mutex); @@ -4206,9 +4517,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, if (drm_core_check_feature(dev, DRIVER_MODESET)) return 0; - if (dev_priv->mm.wedged) { + if (atomic_read(&dev_priv->mm.wedged)) { DRM_ERROR("Reenabling wedged hardware, good luck\n"); - dev_priv->mm.wedged = 0; + atomic_set(&dev_priv->mm.wedged, 0); } mutex_lock(&dev->struct_mutex); @@ -4274,6 +4585,10 @@ i915_gem_load(struct drm_device *dev) i915_gem_retire_work_handler); dev_priv->mm.next_gem_seqno = 1; + spin_lock(&shrink_list_lock); + list_add(&dev_priv->mm.shrink_list, &shrink_list); + spin_unlock(&shrink_list_lock); + /* Old X drivers will take 0-2 for front, back, depth buffers */ dev_priv->fence_reg_start = 3; @@ -4491,3 +4806,116 @@ void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv) list_del_init(i915_file_priv->mm.request_list.next); mutex_unlock(&dev->struct_mutex); } + +static int +i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask) +{ + drm_i915_private_t *dev_priv, *next_dev; + struct drm_i915_gem_object *obj_priv, *next_obj; + int cnt = 0; + int would_deadlock = 1; + + /* "fast-path" to count number of available objects */ + if (nr_to_scan == 0) { + spin_lock(&shrink_list_lock); + list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) { + struct drm_device *dev = dev_priv->dev; + + if (mutex_trylock(&dev->struct_mutex)) { + list_for_each_entry(obj_priv, + &dev_priv->mm.inactive_list, + list) + cnt++; + mutex_unlock(&dev->struct_mutex); + } + } + spin_unlock(&shrink_list_lock); + + return (cnt / 100) * sysctl_vfs_cache_pressure; + } + + spin_lock(&shrink_list_lock); + + /* first scan for clean buffers */ + list_for_each_entry_safe(dev_priv, next_dev, + &shrink_list, mm.shrink_list) { + struct drm_device *dev = dev_priv->dev; + + if (! mutex_trylock(&dev->struct_mutex)) + continue; + + spin_unlock(&shrink_list_lock); + + i915_gem_retire_requests(dev); + + list_for_each_entry_safe(obj_priv, next_obj, + &dev_priv->mm.inactive_list, + list) { + if (i915_gem_object_is_purgeable(obj_priv)) { + i915_gem_object_unbind(obj_priv->obj); + if (--nr_to_scan <= 0) + break; + } + } + + spin_lock(&shrink_list_lock); + mutex_unlock(&dev->struct_mutex); + + would_deadlock = 0; + + if (nr_to_scan <= 0) + break; + } + + /* second pass, evict/count anything still on the inactive list */ + list_for_each_entry_safe(dev_priv, next_dev, + &shrink_list, mm.shrink_list) { + struct drm_device *dev = dev_priv->dev; + + if (! mutex_trylock(&dev->struct_mutex)) + continue; + + spin_unlock(&shrink_list_lock); + + list_for_each_entry_safe(obj_priv, next_obj, + &dev_priv->mm.inactive_list, + list) { + if (nr_to_scan > 0) { + i915_gem_object_unbind(obj_priv->obj); + nr_to_scan--; + } else + cnt++; + } + + spin_lock(&shrink_list_lock); + mutex_unlock(&dev->struct_mutex); + + would_deadlock = 0; + } + + spin_unlock(&shrink_list_lock); + + if (would_deadlock) + return -1; + else if (cnt > 0) + return (cnt / 100) * sysctl_vfs_cache_pressure; + else + return 0; +} + +static struct shrinker shrinker = { + .shrink = i915_gem_shrink, + .seeks = DEFAULT_SEEKS, +}; + +__init void +i915_gem_shrinker_init(void) +{ + register_shrinker(&shrinker); +} + +__exit void +i915_gem_shrinker_exit(void) +{ + unregister_shrinker(&shrinker); +} diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6c89f2ff249..4dfeec7cdd4 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -31,6 +31,7 @@ #include "drm.h" #include "i915_drm.h" #include "i915_drv.h" +#include "i915_trace.h" #include "intel_drv.h" #define MAX_NOPID ((u32)~0) @@ -279,7 +280,9 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev) } if (gt_iir & GT_USER_INTERRUPT) { - dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); + u32 seqno = i915_get_gem_seqno(dev); + dev_priv->mm.irq_gem_seqno = seqno; + trace_i915_gem_request_complete(dev, seqno); DRM_WAKEUP(&dev_priv->irq_queue); } @@ -302,12 +305,25 @@ static void i915_error_work_func(struct work_struct *work) drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, error_work); struct drm_device *dev = dev_priv->dev; - char *event_string = "ERROR=1"; - char *envp[] = { event_string, NULL }; + char *error_event[] = { "ERROR=1", NULL }; + char *reset_event[] = { "RESET=1", NULL }; + char *reset_done_event[] = { "ERROR=0", NULL }; DRM_DEBUG("generating error event\n"); - - kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp); + kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event); + + if (atomic_read(&dev_priv->mm.wedged)) { + if (IS_I965G(dev)) { + DRM_DEBUG("resetting chip\n"); + kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event); + if (!i965_reset(dev, GDRST_RENDER)) { + atomic_set(&dev_priv->mm.wedged, 0); + kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event); + } + } else { + printk("reboot required\n"); + } + } } /** @@ -372,7 +388,7 @@ out: * so userspace knows something bad happened (should trigger collection * of a ring dump etc.). */ -static void i915_handle_error(struct drm_device *dev) +static void i915_handle_error(struct drm_device *dev, bool wedged) { struct drm_i915_private *dev_priv = dev->dev_private; u32 eir = I915_READ(EIR); @@ -482,6 +498,16 @@ static void i915_handle_error(struct drm_device *dev) I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); } + if (wedged) { + atomic_set(&dev_priv->mm.wedged, 1); + + /* + * Wakeup waiting processes so they don't hang + */ + printk("i915: Waking up sleeping processes\n"); + DRM_WAKEUP(&dev_priv->irq_queue); + } + queue_work(dev_priv->wq, &dev_priv->error_work); } @@ -527,7 +553,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) pipeb_stats = I915_READ(PIPEBSTAT); if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) - i915_handle_error(dev); + i915_handle_error(dev, false); /* * Clear the PIPE(A|B)STAT regs before the IIR @@ -599,8 +625,12 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) } if (iir & I915_USER_INTERRUPT) { - dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); + u32 seqno = i915_get_gem_seqno(dev); + dev_priv->mm.irq_gem_seqno = seqno; + trace_i915_gem_request_complete(dev, seqno); DRM_WAKEUP(&dev_priv->irq_queue); + dev_priv->hangcheck_count = 0; + mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); } if (pipea_stats & vblank_status) { @@ -880,6 +910,52 @@ int i915_vblank_swap(struct drm_device *dev, void *data, return -EINVAL; } +struct drm_i915_gem_request *i915_get_tail_request(struct drm_device *dev) { + drm_i915_private_t *dev_priv = dev->dev_private; + return list_entry(dev_priv->mm.request_list.prev, struct drm_i915_gem_request, list); +} + +/** + * This is called when the chip hasn't reported back with completed + * batchbuffers in a long time. The first time this is called we simply record + * ACTHD. If ACTHD hasn't changed by the time the hangcheck timer elapses + * again, we assume the chip is wedged and try to fix it. + */ +void i915_hangcheck_elapsed(unsigned long data) +{ + struct drm_device *dev = (struct drm_device *)data; + drm_i915_private_t *dev_priv = dev->dev_private; + uint32_t acthd; + + if (!IS_I965G(dev)) + acthd = I915_READ(ACTHD); + else + acthd = I915_READ(ACTHD_I965); + + /* If all work is done then ACTHD clearly hasn't advanced. */ + if (list_empty(&dev_priv->mm.request_list) || + i915_seqno_passed(i915_get_gem_seqno(dev), i915_get_tail_request(dev)->seqno)) { + dev_priv->hangcheck_count = 0; + return; + } + + if (dev_priv->last_acthd == acthd && dev_priv->hangcheck_count > 0) { + DRM_ERROR("Hangcheck timer elapsed... GPU hung\n"); + i915_handle_error(dev, true); + return; + } + + /* Reset timer case chip hangs without another request being added */ + mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); + + if (acthd != dev_priv->last_acthd) + dev_priv->hangcheck_count = 0; + else + dev_priv->hangcheck_count++; + + dev_priv->last_acthd = acthd; +} + /* drm_dma.h hooks */ static void igdng_irq_preinstall(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_opregion.c b/drivers/gpu/drm/i915/i915_opregion.c index e4b4e8898e3..2d5193556d3 100644 --- a/drivers/gpu/drm/i915/i915_opregion.c +++ b/drivers/gpu/drm/i915/i915_opregion.c @@ -148,6 +148,7 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp) struct drm_i915_private *dev_priv = dev->dev_private; struct opregion_asle *asle = dev_priv->opregion.asle; u32 blc_pwm_ctl, blc_pwm_ctl2; + u32 max_backlight, level, shift; if (!(bclp & ASLE_BCLP_VALID)) return ASLE_BACKLIGHT_FAIL; @@ -157,14 +158,25 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp) return ASLE_BACKLIGHT_FAIL; blc_pwm_ctl = I915_READ(BLC_PWM_CTL); - blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK; blc_pwm_ctl2 = I915_READ(BLC_PWM_CTL2); - if (blc_pwm_ctl2 & BLM_COMBINATION_MODE) + if (IS_I965G(dev) && (blc_pwm_ctl2 & BLM_COMBINATION_MODE)) pci_write_config_dword(dev->pdev, PCI_LBPC, bclp); - else - I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | ((bclp * 0x101)-1)); - + else { + if (IS_IGD(dev)) { + blc_pwm_ctl &= ~(BACKLIGHT_DUTY_CYCLE_MASK - 1); + max_backlight = (blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >> + BACKLIGHT_MODULATION_FREQ_SHIFT; + shift = BACKLIGHT_DUTY_CYCLE_SHIFT + 1; + } else { + blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK; + max_backlight = ((blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >> + BACKLIGHT_MODULATION_FREQ_SHIFT) * 2; + shift = BACKLIGHT_DUTY_CYCLE_SHIFT; + } + level = (bclp * max_backlight) / 255; + I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | (level << shift)); + } asle->cblv = (bclp*0x64)/0xff | ASLE_CBLV_VALID; return 0; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3f796355346..0466ddbeba3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -86,6 +86,10 @@ #define I915_GC_RENDER_CLOCK_200_MHZ (1 << 0) #define I915_GC_RENDER_CLOCK_333_MHZ (4 << 0) #define LBB 0xf4 +#define GDRST 0xc0 +#define GDRST_FULL (0<<2) +#define GDRST_RENDER (1<<2) +#define GDRST_MEDIA (3<<2) /* VGA stuff */ @@ -344,9 +348,37 @@ #define FBC_CTL_PLANEA (0<<0) #define FBC_CTL_PLANEB (1<<0) #define FBC_FENCE_OFF 0x0321b +#define FBC_TAG 0x03300 #define FBC_LL_SIZE (1536) +/* Framebuffer compression for GM45+ */ +#define DPFC_CB_BASE 0x3200 +#define DPFC_CONTROL 0x3208 +#define DPFC_CTL_EN (1<<31) +#define DPFC_CTL_PLANEA (0<<30) +#define DPFC_CTL_PLANEB (1<<30) +#define DPFC_CTL_FENCE_EN (1<<29) +#define DPFC_SR_EN (1<<10) +#define DPFC_CTL_LIMIT_1X (0<<6) +#define DPFC_CTL_LIMIT_2X (1<<6) +#define DPFC_CTL_LIMIT_4X (2<<6) +#define DPFC_RECOMP_CTL 0x320c +#define DPFC_RECOMP_STALL_EN (1<<27) +#define DPFC_RECOMP_STALL_WM_SHIFT (16) +#define DPFC_RECOMP_STALL_WM_MASK (0x07ff0000) +#define DPFC_RECOMP_TIMER_COUNT_SHIFT (0) +#define DPFC_RECOMP_TIMER_COUNT_MASK (0x0000003f) +#define DPFC_STATUS 0x3210 +#define DPFC_INVAL_SEG_SHIFT (16) +#define DPFC_INVAL_SEG_MASK (0x07ff0000) +#define DPFC_COMP_SEG_SHIFT (0) +#define DPFC_COMP_SEG_MASK (0x000003ff) +#define DPFC_STATUS2 0x3214 +#define DPFC_FENCE_YOFF 0x3218 +#define DPFC_CHICKEN 0x3224 +#define DPFC_HT_MODIFY (1<<31) + /* * GPIO regs */ @@ -2000,6 +2032,8 @@ #define PF_ENABLE (1<<31) #define PFA_WIN_SZ 0x68074 #define PFB_WIN_SZ 0x68874 +#define PFA_WIN_POS 0x68070 +#define PFB_WIN_POS 0x68870 /* legacy palette */ #define LGC_PALETTE_A 0x4a000 diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 20d4d19f556..bd6d8d91ca9 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -228,6 +228,7 @@ static void i915_save_modeset_reg(struct drm_device *dev) if (drm_core_check_feature(dev, DRIVER_MODESET)) return; + /* Pipe & plane A info */ dev_priv->savePIPEACONF = I915_READ(PIPEACONF); dev_priv->savePIPEASRC = I915_READ(PIPEASRC); @@ -285,6 +286,7 @@ static void i915_save_modeset_reg(struct drm_device *dev) dev_priv->savePIPEBSTAT = I915_READ(PIPEBSTAT); return; } + static void i915_restore_modeset_reg(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -379,19 +381,10 @@ static void i915_restore_modeset_reg(struct drm_device *dev) return; } -int i915_save_state(struct drm_device *dev) + +void i915_save_display(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int i; - - pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB); - - /* Render Standby */ - if (IS_I965G(dev) && IS_MOBILE(dev)) - dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY); - - /* Hardware status page */ - dev_priv->saveHWS = I915_READ(HWS_PGA); /* Display arbitration control */ dev_priv->saveDSPARB = I915_READ(DSPARB); @@ -399,6 +392,7 @@ int i915_save_state(struct drm_device *dev) /* This is only meaningful in non-KMS mode */ /* Don't save them in KMS mode */ i915_save_modeset_reg(dev); + /* Cursor state */ dev_priv->saveCURACNTR = I915_READ(CURACNTR); dev_priv->saveCURAPOS = I915_READ(CURAPOS); @@ -448,81 +442,22 @@ int i915_save_state(struct drm_device *dev) dev_priv->saveFBC_CONTROL2 = I915_READ(FBC_CONTROL2); dev_priv->saveFBC_CONTROL = I915_READ(FBC_CONTROL); - /* Interrupt state */ - dev_priv->saveIIR = I915_READ(IIR); - dev_priv->saveIER = I915_READ(IER); - dev_priv->saveIMR = I915_READ(IMR); - /* VGA state */ dev_priv->saveVGA0 = I915_READ(VGA0); dev_priv->saveVGA1 = I915_READ(VGA1); dev_priv->saveVGA_PD = I915_READ(VGA_PD); dev_priv->saveVGACNTRL = I915_READ(VGACNTRL); - /* Clock gating state */ - dev_priv->saveD_STATE = I915_READ(D_STATE); - dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D); - - /* Cache mode state */ - dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0); - - /* Memory Arbitration state */ - dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE); - - /* Scratch space */ - for (i = 0; i < 16; i++) { - dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2)); - dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2)); - } - for (i = 0; i < 3; i++) - dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2)); - - /* Fences */ - if (IS_I965G(dev)) { - for (i = 0; i < 16; i++) - dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8)); - } else { - for (i = 0; i < 8; i++) - dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4)); - - if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) - for (i = 0; i < 8; i++) - dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4)); - } i915_save_vga(dev); - - return 0; } -int i915_restore_state(struct drm_device *dev) +void i915_restore_display(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int i; - - pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB); - - /* Render Standby */ - if (IS_I965G(dev) && IS_MOBILE(dev)) - I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY); - - /* Hardware status page */ - I915_WRITE(HWS_PGA, dev_priv->saveHWS); /* Display arbitration */ I915_WRITE(DSPARB, dev_priv->saveDSPARB); - /* Fences */ - if (IS_I965G(dev)) { - for (i = 0; i < 16; i++) - I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]); - } else { - for (i = 0; i < 8; i++) - I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]); - if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) - for (i = 0; i < 8; i++) - I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]); - } - /* Display port ratios (must be done before clock is set) */ if (SUPPORTS_INTEGRATED_DP(dev)) { I915_WRITE(PIPEA_GMCH_DATA_M, dev_priv->savePIPEA_GMCH_DATA_M); @@ -534,9 +469,11 @@ int i915_restore_state(struct drm_device *dev) I915_WRITE(PIPEA_DP_LINK_N, dev_priv->savePIPEA_DP_LINK_N); I915_WRITE(PIPEB_DP_LINK_N, dev_priv->savePIPEB_DP_LINK_N); } + /* This is only meaningful in non-KMS mode */ /* Don't restore them in KMS mode */ i915_restore_modeset_reg(dev); + /* Cursor state */ I915_WRITE(CURAPOS, dev_priv->saveCURAPOS); I915_WRITE(CURACNTR, dev_priv->saveCURACNTR); @@ -586,6 +523,95 @@ int i915_restore_state(struct drm_device *dev) I915_WRITE(VGA_PD, dev_priv->saveVGA_PD); DRM_UDELAY(150); + i915_restore_vga(dev); +} + +int i915_save_state(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int i; + + pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB); + + /* Render Standby */ + if (IS_I965G(dev) && IS_MOBILE(dev)) + dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY); + + /* Hardware status page */ + dev_priv->saveHWS = I915_READ(HWS_PGA); + + i915_save_display(dev); + + /* Interrupt state */ + dev_priv->saveIER = I915_READ(IER); + dev_priv->saveIMR = I915_READ(IMR); + + /* Clock gating state */ + dev_priv->saveD_STATE = I915_READ(D_STATE); + dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D); /* Not sure about this */ + + /* Cache mode state */ + dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0); + + /* Memory Arbitration state */ + dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE); + + /* Scratch space */ + for (i = 0; i < 16; i++) { + dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2)); + dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2)); + } + for (i = 0; i < 3; i++) + dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2)); + + /* Fences */ + if (IS_I965G(dev)) { + for (i = 0; i < 16; i++) + dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8)); + } else { + for (i = 0; i < 8; i++) + dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4)); + + if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) + for (i = 0; i < 8; i++) + dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4)); + } + + return 0; +} + +int i915_restore_state(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int i; + + pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB); + + /* Render Standby */ + if (IS_I965G(dev) && IS_MOBILE(dev)) + I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY); + + /* Hardware status page */ + I915_WRITE(HWS_PGA, dev_priv->saveHWS); + + /* Fences */ + if (IS_I965G(dev)) { + for (i = 0; i < 16; i++) + I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]); + } else { + for (i = 0; i < 8; i++) + I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]); + if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) + for (i = 0; i < 8; i++) + I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]); + } + + i915_restore_display(dev); + + /* Interrupt state */ + I915_WRITE (IER, dev_priv->saveIER); + I915_WRITE (IMR, dev_priv->saveIMR); + /* Clock gating state */ I915_WRITE (D_STATE, dev_priv->saveD_STATE); I915_WRITE (DSPCLK_GATE_D, dev_priv->saveDSPCLK_GATE_D); @@ -603,8 +629,6 @@ int i915_restore_state(struct drm_device *dev) for (i = 0; i < 3; i++) I915_WRITE(SWF30 + (i << 2), dev_priv->saveSWF2[i]); - i915_restore_vga(dev); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h new file mode 100644 index 00000000000..5567a40816f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -0,0 +1,315 @@ +#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _I915_TRACE_H_ + +#include <linux/stringify.h> +#include <linux/types.h> +#include <linux/tracepoint.h> + +#include <drm/drmP.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM i915 +#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) +#define TRACE_INCLUDE_FILE i915_trace + +/* object tracking */ + +TRACE_EVENT(i915_gem_object_create, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(u32, size) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->size = obj->size; + ), + + TP_printk("obj=%p, size=%u", __entry->obj, __entry->size) +); + +TRACE_EVENT(i915_gem_object_bind, + + TP_PROTO(struct drm_gem_object *obj, u32 gtt_offset), + + TP_ARGS(obj, gtt_offset), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(u32, gtt_offset) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->gtt_offset = gtt_offset; + ), + + TP_printk("obj=%p, gtt_offset=%08x", + __entry->obj, __entry->gtt_offset) +); + +TRACE_EVENT(i915_gem_object_clflush, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + ), + + TP_fast_assign( + __entry->obj = obj; + ), + + TP_printk("obj=%p", __entry->obj) +); + +TRACE_EVENT(i915_gem_object_change_domain, + + TP_PROTO(struct drm_gem_object *obj, uint32_t old_read_domains, uint32_t old_write_domain), + + TP_ARGS(obj, old_read_domains, old_write_domain), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(u32, read_domains) + __field(u32, write_domain) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->read_domains = obj->read_domains | (old_read_domains << 16); + __entry->write_domain = obj->write_domain | (old_write_domain << 16); + ), + + TP_printk("obj=%p, read=%04x, write=%04x", + __entry->obj, + __entry->read_domains, __entry->write_domain) +); + +TRACE_EVENT(i915_gem_object_get_fence, + + TP_PROTO(struct drm_gem_object *obj, int fence, int tiling_mode), + + TP_ARGS(obj, fence, tiling_mode), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(int, fence) + __field(int, tiling_mode) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->fence = fence; + __entry->tiling_mode = tiling_mode; + ), + + TP_printk("obj=%p, fence=%d, tiling=%d", + __entry->obj, __entry->fence, __entry->tiling_mode) +); + +TRACE_EVENT(i915_gem_object_unbind, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + ), + + TP_fast_assign( + __entry->obj = obj; + ), + + TP_printk("obj=%p", __entry->obj) +); + +TRACE_EVENT(i915_gem_object_destroy, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + ), + + TP_fast_assign( + __entry->obj = obj; + ), + + TP_printk("obj=%p", __entry->obj) +); + +/* batch tracing */ + +TRACE_EVENT(i915_gem_request_submit, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_flush, + + TP_PROTO(struct drm_device *dev, u32 seqno, + u32 flush_domains, u32 invalidate_domains), + + TP_ARGS(dev, seqno, flush_domains, invalidate_domains), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + __field(u32, flush_domains) + __field(u32, invalidate_domains) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + __entry->flush_domains = flush_domains; + __entry->invalidate_domains = invalidate_domains; + ), + + TP_printk("dev=%p, seqno=%u, flush=%04x, invalidate=%04x", + __entry->dev, __entry->seqno, + __entry->flush_domains, __entry->invalidate_domains) +); + + +TRACE_EVENT(i915_gem_request_complete, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_retire, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_wait_begin, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_wait_end, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_ring_wait_begin, + + TP_PROTO(struct drm_device *dev), + + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + ), + + TP_fast_assign( + __entry->dev = dev; + ), + + TP_printk("dev=%p", __entry->dev) +); + +TRACE_EVENT(i915_ring_wait_end, + + TP_PROTO(struct drm_device *dev), + + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + ), + + TP_fast_assign( + __entry->dev = dev; + ), + + TP_printk("dev=%p", __entry->dev) +); + +#endif /* _I915_TRACE_H_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915 +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/i915/i915_trace_points.c b/drivers/gpu/drm/i915/i915_trace_points.c new file mode 100644 index 00000000000..ead876eb6ea --- /dev/null +++ b/drivers/gpu/drm/i915/i915_trace_points.c @@ -0,0 +1,11 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#include "i915_drv.h" + +#define CREATE_TRACE_POINTS +#include "i915_trace.h" diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 1e28c1652fd..4337414846b 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -217,6 +217,9 @@ parse_general_features(struct drm_i915_private *dev_priv, if (IS_I85X(dev_priv->dev)) dev_priv->lvds_ssc_freq = general->ssc_freq ? 66 : 48; + else if (IS_IGDNG(dev_priv->dev)) + dev_priv->lvds_ssc_freq = + general->ssc_freq ? 100 : 120; else dev_priv->lvds_ssc_freq = general->ssc_freq ? 100 : 96; diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 88814fa2dfd..212e22740fc 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -179,13 +179,10 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = dev->dev_private; - u32 adpa, temp; + u32 adpa; bool ret; - temp = adpa = I915_READ(PCH_ADPA); - - adpa &= ~ADPA_DAC_ENABLE; - I915_WRITE(PCH_ADPA, adpa); + adpa = I915_READ(PCH_ADPA); adpa &= ~ADPA_CRT_HOTPLUG_MASK; @@ -212,8 +209,6 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector) else ret = false; - /* restore origin register */ - I915_WRITE(PCH_ADPA, temp); return ret; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0227b165290..93ff6c03733 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -24,6 +24,8 @@ * Eric Anholt <eric@anholt.net> */ +#include <linux/module.h> +#include <linux/input.h> #include <linux/i2c.h> #include <linux/kernel.h> #include "drmP.h" @@ -875,7 +877,7 @@ intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc, refclk, best_clock); if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { - if ((I915_READ(LVDS) & LVDS_CLKB_POWER_MASK) == + if ((I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP) clock.p2 = limit->p2.p2_fast; else @@ -952,6 +954,241 @@ intel_wait_for_vblank(struct drm_device *dev) mdelay(20); } +/* Parameters have changed, update FBC info */ +static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_framebuffer *fb = crtc->fb; + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); + struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int plane, i; + u32 fbc_ctl, fbc_ctl2; + + dev_priv->cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE; + + if (fb->pitch < dev_priv->cfb_pitch) + dev_priv->cfb_pitch = fb->pitch; + + /* FBC_CTL wants 64B units */ + dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1; + dev_priv->cfb_fence = obj_priv->fence_reg; + dev_priv->cfb_plane = intel_crtc->plane; + plane = dev_priv->cfb_plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB; + + /* Clear old tags */ + for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++) + I915_WRITE(FBC_TAG + (i * 4), 0); + + /* Set it up... */ + fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | plane; + if (obj_priv->tiling_mode != I915_TILING_NONE) + fbc_ctl2 |= FBC_CTL_CPU_FENCE; + I915_WRITE(FBC_CONTROL2, fbc_ctl2); + I915_WRITE(FBC_FENCE_OFF, crtc->y); + + /* enable it... */ + fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC; + fbc_ctl |= (dev_priv->cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT; + fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT; + if (obj_priv->tiling_mode != I915_TILING_NONE) + fbc_ctl |= dev_priv->cfb_fence; + I915_WRITE(FBC_CONTROL, fbc_ctl); + + DRM_DEBUG("enabled FBC, pitch %ld, yoff %d, plane %d, ", + dev_priv->cfb_pitch, crtc->y, dev_priv->cfb_plane); +} + +void i8xx_disable_fbc(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 fbc_ctl; + + if (!I915_HAS_FBC(dev)) + return; + + /* Disable compression */ + fbc_ctl = I915_READ(FBC_CONTROL); + fbc_ctl &= ~FBC_CTL_EN; + I915_WRITE(FBC_CONTROL, fbc_ctl); + + /* Wait for compressing bit to clear */ + while (I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING) + ; /* nothing */ + + intel_wait_for_vblank(dev); + + DRM_DEBUG("disabled FBC\n"); +} + +static bool i8xx_fbc_enabled(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + return I915_READ(FBC_CONTROL) & FBC_CTL_EN; +} + +static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_framebuffer *fb = crtc->fb; + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); + struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int plane = (intel_crtc->plane == 0 ? DPFC_CTL_PLANEA : + DPFC_CTL_PLANEB); + unsigned long stall_watermark = 200; + u32 dpfc_ctl; + + dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1; + dev_priv->cfb_fence = obj_priv->fence_reg; + dev_priv->cfb_plane = intel_crtc->plane; + + dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X; + if (obj_priv->tiling_mode != I915_TILING_NONE) { + dpfc_ctl |= DPFC_CTL_FENCE_EN | dev_priv->cfb_fence; + I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY); + } else { + I915_WRITE(DPFC_CHICKEN, ~DPFC_HT_MODIFY); + } + + I915_WRITE(DPFC_CONTROL, dpfc_ctl); + I915_WRITE(DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN | + (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) | + (interval << DPFC_RECOMP_TIMER_COUNT_SHIFT)); + I915_WRITE(DPFC_FENCE_YOFF, crtc->y); + + /* enable it... */ + I915_WRITE(DPFC_CONTROL, I915_READ(DPFC_CONTROL) | DPFC_CTL_EN); + + DRM_DEBUG("enabled fbc on plane %d\n", intel_crtc->plane); +} + +void g4x_disable_fbc(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 dpfc_ctl; + + /* Disable compression */ + dpfc_ctl = I915_READ(DPFC_CONTROL); + dpfc_ctl &= ~DPFC_CTL_EN; + I915_WRITE(DPFC_CONTROL, dpfc_ctl); + intel_wait_for_vblank(dev); + + DRM_DEBUG("disabled FBC\n"); +} + +static bool g4x_fbc_enabled(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN; +} + +/** + * intel_update_fbc - enable/disable FBC as needed + * @crtc: CRTC to point the compressor at + * @mode: mode in use + * + * Set up the framebuffer compression hardware at mode set time. We + * enable it if possible: + * - plane A only (on pre-965) + * - no pixel mulitply/line duplication + * - no alpha buffer discard + * - no dual wide + * - framebuffer <= 2048 in width, 1536 in height + * + * We can't assume that any compression will take place (worst case), + * so the compressed buffer has to be the same size as the uncompressed + * one. It also must reside (along with the line length buffer) in + * stolen memory. + * + * We need to enable/disable FBC on a global basis. + */ +static void intel_update_fbc(struct drm_crtc *crtc, + struct drm_display_mode *mode) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_framebuffer *fb = crtc->fb; + struct intel_framebuffer *intel_fb; + struct drm_i915_gem_object *obj_priv; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int plane = intel_crtc->plane; + + if (!i915_powersave) + return; + + if (!dev_priv->display.fbc_enabled || + !dev_priv->display.enable_fbc || + !dev_priv->display.disable_fbc) + return; + + if (!crtc->fb) + return; + + intel_fb = to_intel_framebuffer(fb); + obj_priv = intel_fb->obj->driver_private; + + /* + * If FBC is already on, we just have to verify that we can + * keep it that way... + * Need to disable if: + * - changing FBC params (stride, fence, mode) + * - new fb is too large to fit in compressed buffer + * - going to an unsupported config (interlace, pixel multiply, etc.) + */ + if (intel_fb->obj->size > dev_priv->cfb_size) { + DRM_DEBUG("framebuffer too large, disabling compression\n"); + goto out_disable; + } + if ((mode->flags & DRM_MODE_FLAG_INTERLACE) || + (mode->flags & DRM_MODE_FLAG_DBLSCAN)) { + DRM_DEBUG("mode incompatible with compression, disabling\n"); + goto out_disable; + } + if ((mode->hdisplay > 2048) || + (mode->vdisplay > 1536)) { + DRM_DEBUG("mode too large for compression, disabling\n"); + goto out_disable; + } + if ((IS_I915GM(dev) || IS_I945GM(dev)) && plane != 0) { + DRM_DEBUG("plane not 0, disabling compression\n"); + goto out_disable; + } + if (obj_priv->tiling_mode != I915_TILING_X) { + DRM_DEBUG("framebuffer not tiled, disabling compression\n"); + goto out_disable; + } + + if (dev_priv->display.fbc_enabled(crtc)) { + /* We can re-enable it in this case, but need to update pitch */ + if (fb->pitch > dev_priv->cfb_pitch) + dev_priv->display.disable_fbc(dev); + if (obj_priv->fence_reg != dev_priv->cfb_fence) + dev_priv->display.disable_fbc(dev); + if (plane != dev_priv->cfb_plane) + dev_priv->display.disable_fbc(dev); + } + + if (!dev_priv->display.fbc_enabled(crtc)) { + /* Now try to turn it back on if possible */ + dev_priv->display.enable_fbc(crtc, 500); + } + + return; + +out_disable: + DRM_DEBUG("unsupported config, disabling FBC\n"); + /* Multiple disables should be harmless */ + if (dev_priv->display.fbc_enabled(crtc)) + dev_priv->display.disable_fbc(dev); +} + static int intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb) @@ -964,12 +1201,13 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, struct drm_i915_gem_object *obj_priv; struct drm_gem_object *obj; int pipe = intel_crtc->pipe; + int plane = intel_crtc->plane; unsigned long Start, Offset; - int dspbase = (pipe == 0 ? DSPAADDR : DSPBADDR); - int dspsurf = (pipe == 0 ? DSPASURF : DSPBSURF); - int dspstride = (pipe == 0) ? DSPASTRIDE : DSPBSTRIDE; - int dsptileoff = (pipe == 0 ? DSPATILEOFF : DSPBTILEOFF); - int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR; + int dspbase = (plane == 0 ? DSPAADDR : DSPBADDR); + int dspsurf = (plane == 0 ? DSPASURF : DSPBSURF); + int dspstride = (plane == 0) ? DSPASTRIDE : DSPBSTRIDE; + int dsptileoff = (plane == 0 ? DSPATILEOFF : DSPBTILEOFF); + int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR; u32 dspcntr, alignment; int ret; @@ -979,12 +1217,12 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, return 0; } - switch (pipe) { + switch (plane) { case 0: case 1: break; default: - DRM_ERROR("Can't update pipe %d in SAREA\n", pipe); + DRM_ERROR("Can't update plane %d in SAREA\n", plane); return -EINVAL; } @@ -1086,6 +1324,9 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, I915_READ(dspbase); } + if ((IS_I965G(dev) || plane == 0)) + intel_update_fbc(crtc, &crtc->mode); + intel_wait_for_vblank(dev); if (old_fb) { @@ -1217,6 +1458,7 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) int transconf_reg = (pipe == 0) ? TRANSACONF : TRANSBCONF; int pf_ctl_reg = (pipe == 0) ? PFA_CTL_1 : PFB_CTL_1; int pf_win_size = (pipe == 0) ? PFA_WIN_SZ : PFB_WIN_SZ; + int pf_win_pos = (pipe == 0) ? PFA_WIN_POS : PFB_WIN_POS; int cpu_htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B; int cpu_hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B; int cpu_hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B; @@ -1268,6 +1510,19 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode) } } + /* Enable panel fitting for LVDS */ + if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) { + temp = I915_READ(pf_ctl_reg); + I915_WRITE(pf_ctl_reg, temp | PF_ENABLE); + + /* currently full aspect */ + I915_WRITE(pf_win_pos, 0); + + I915_WRITE(pf_win_size, + (dev_priv->panel_fixed_mode->hdisplay << 16) | + (dev_priv->panel_fixed_mode->vdisplay)); + } + /* Enable CPU pipe */ temp = I915_READ(pipeconf_reg); if ((temp & PIPEACONF_ENABLE) == 0) { @@ -1532,9 +1787,10 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; + int plane = intel_crtc->plane; int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B; - int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR; - int dspbase_reg = (pipe == 0) ? DSPAADDR : DSPBADDR; + int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR; + int dspbase_reg = (plane == 0) ? DSPAADDR : DSPBADDR; int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF; u32 temp; @@ -1577,6 +1833,9 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode) intel_crtc_load_lut(crtc); + if ((IS_I965G(dev) || plane == 0)) + intel_update_fbc(crtc, &crtc->mode); + /* Give the overlay scaler a chance to enable if it's on this pipe */ //intel_crtc_dpms_video(crtc, true); TODO intel_update_watermarks(dev); @@ -1586,6 +1845,10 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode) /* Give the overlay scaler a chance to disable if it's on this pipe */ //intel_crtc_dpms_video(crtc, FALSE); TODO + if (dev_priv->cfb_plane == plane && + dev_priv->display.disable_fbc) + dev_priv->display.disable_fbc(dev); + /* Disable the VGA plane that we never use */ i915_disable_vga(dev); @@ -1634,15 +1897,13 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode) static void intel_crtc_dpms(struct drm_crtc *crtc, int mode) { struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_master_private *master_priv; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; bool enabled; - if (IS_IGDNG(dev)) - igdng_crtc_dpms(crtc, mode); - else - i9xx_crtc_dpms(crtc, mode); + dev_priv->display.dpms(crtc, mode); intel_crtc->dpms_mode = mode; @@ -1709,56 +1970,68 @@ static bool intel_crtc_mode_fixup(struct drm_crtc *crtc, return true; } +static int i945_get_display_clock_speed(struct drm_device *dev) +{ + return 400000; +} -/** Returns the core display clock speed for i830 - i945 */ -static int intel_get_core_clock_speed(struct drm_device *dev) +static int i915_get_display_clock_speed(struct drm_device *dev) { + return 333000; +} - /* Core clock values taken from the published datasheets. - * The 830 may go up to 166 Mhz, which we should check. - */ - if (IS_I945G(dev)) - return 400000; - else if (IS_I915G(dev)) - return 333000; - else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev)) - return 200000; - else if (IS_I915GM(dev)) { - u16 gcfgc = 0; +static int i9xx_misc_get_display_clock_speed(struct drm_device *dev) +{ + return 200000; +} - pci_read_config_word(dev->pdev, GCFGC, &gcfgc); +static int i915gm_get_display_clock_speed(struct drm_device *dev) +{ + u16 gcfgc = 0; - if (gcfgc & GC_LOW_FREQUENCY_ENABLE) - return 133000; - else { - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_333_MHZ: - return 333000; - default: - case GC_DISPLAY_CLOCK_190_200_MHZ: - return 190000; - } - } - } else if (IS_I865G(dev)) - return 266000; - else if (IS_I855(dev)) { - u16 hpllcc = 0; - /* Assume that the hardware is in the high speed state. This - * should be the default. - */ - switch (hpllcc & GC_CLOCK_CONTROL_MASK) { - case GC_CLOCK_133_200: - case GC_CLOCK_100_200: - return 200000; - case GC_CLOCK_166_250: - return 250000; - case GC_CLOCK_100_133: - return 133000; + pci_read_config_word(dev->pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) + return 133000; + else { + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_MHZ: + return 333000; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + return 190000; } - } else /* 852, 830 */ + } +} + +static int i865_get_display_clock_speed(struct drm_device *dev) +{ + return 266000; +} + +static int i855_get_display_clock_speed(struct drm_device *dev) +{ + u16 hpllcc = 0; + /* Assume that the hardware is in the high speed state. This + * should be the default. + */ + switch (hpllcc & GC_CLOCK_CONTROL_MASK) { + case GC_CLOCK_133_200: + case GC_CLOCK_100_200: + return 200000; + case GC_CLOCK_166_250: + return 250000; + case GC_CLOCK_100_133: return 133000; + } + + /* Shouldn't happen */ + return 0; +} - return 0; /* Silence gcc warning */ +static int i830_get_display_clock_speed(struct drm_device *dev) +{ + return 133000; } /** @@ -1921,7 +2194,14 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz, { long entries_required, wm_size; - entries_required = (clock_in_khz * pixel_size * latency_ns) / 1000000; + /* + * Note: we need to make sure we don't overflow for various clock & + * latency values. + * clocks go from a few thousand to several hundred thousand. + * latency is usually a few thousand + */ + entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) / + 1000; entries_required /= wm->cacheline_size; DRM_DEBUG("FIFO entries required for mode: %d\n", entries_required); @@ -1986,14 +2266,13 @@ static struct cxsr_latency *intel_get_cxsr_latency(int is_desktop, int fsb, for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { latency = &cxsr_latency_table[i]; if (is_desktop == latency->is_desktop && - fsb == latency->fsb_freq && mem == latency->mem_freq) - break; + fsb == latency->fsb_freq && mem == latency->mem_freq) + return latency; } - if (i >= ARRAY_SIZE(cxsr_latency_table)) { - DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n"); - return NULL; - } - return latency; + + DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n"); + + return NULL; } static void igd_disable_cxsr(struct drm_device *dev) @@ -2084,32 +2363,36 @@ static void igd_enable_cxsr(struct drm_device *dev, unsigned long clock, */ const static int latency_ns = 5000; -static int intel_get_fifo_size(struct drm_device *dev, int plane) +static int i9xx_get_fifo_size(struct drm_device *dev, int plane) { struct drm_i915_private *dev_priv = dev->dev_private; uint32_t dsparb = I915_READ(DSPARB); int size; - if (IS_I9XX(dev)) { - if (plane == 0) - size = dsparb & 0x7f; - else - size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - - (dsparb & 0x7f); - } else if (IS_I85X(dev)) { - if (plane == 0) - size = dsparb & 0x1ff; - else - size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - - (dsparb & 0x1ff); - size >>= 1; /* Convert to cachelines */ - } else if (IS_845G(dev)) { + if (plane == 0) size = dsparb & 0x7f; - size >>= 2; /* Convert to cachelines */ - } else { - size = dsparb & 0x7f; - size >>= 1; /* Convert to cachelines */ - } + else + size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - + (dsparb & 0x7f); + + DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A", + size); + + return size; +} + +static int i85x_get_fifo_size(struct drm_device *dev, int plane) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dsparb = I915_READ(DSPARB); + int size; + + if (plane == 0) + size = dsparb & 0x1ff; + else + size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - + (dsparb & 0x1ff); + size >>= 1; /* Convert to cachelines */ DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A", size); @@ -2117,7 +2400,38 @@ static int intel_get_fifo_size(struct drm_device *dev, int plane) return size; } -static void g4x_update_wm(struct drm_device *dev) +static int i845_get_fifo_size(struct drm_device *dev, int plane) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dsparb = I915_READ(DSPARB); + int size; + + size = dsparb & 0x7f; + size >>= 2; /* Convert to cachelines */ + + DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A", + size); + + return size; +} + +static int i830_get_fifo_size(struct drm_device *dev, int plane) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dsparb = I915_READ(DSPARB); + int size; + + size = dsparb & 0x7f; + size >>= 1; /* Convert to cachelines */ + + DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A", + size); + + return size; +} + +static void g4x_update_wm(struct drm_device *dev, int unused, int unused2, + int unused3, int unused4) { struct drm_i915_private *dev_priv = dev->dev_private; u32 fw_blc_self = I915_READ(FW_BLC_SELF); @@ -2129,7 +2443,8 @@ static void g4x_update_wm(struct drm_device *dev) I915_WRITE(FW_BLC_SELF, fw_blc_self); } -static void i965_update_wm(struct drm_device *dev) +static void i965_update_wm(struct drm_device *dev, int unused, int unused2, + int unused3, int unused4) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -2165,8 +2480,8 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock, cacheline_size = planea_params.cacheline_size; /* Update per-plane FIFO sizes */ - planea_params.fifo_size = intel_get_fifo_size(dev, 0); - planeb_params.fifo_size = intel_get_fifo_size(dev, 1); + planea_params.fifo_size = dev_priv->display.get_fifo_size(dev, 0); + planeb_params.fifo_size = dev_priv->display.get_fifo_size(dev, 1); planea_wm = intel_calculate_wm(planea_clock, &planea_params, pixel_size, latency_ns); @@ -2213,14 +2528,14 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock, I915_WRITE(FW_BLC2, fwater_hi); } -static void i830_update_wm(struct drm_device *dev, int planea_clock, - int pixel_size) +static void i830_update_wm(struct drm_device *dev, int planea_clock, int unused, + int unused2, int pixel_size) { struct drm_i915_private *dev_priv = dev->dev_private; uint32_t fwater_lo = I915_READ(FW_BLC) & ~0xfff; int planea_wm; - i830_wm_info.fifo_size = intel_get_fifo_size(dev, 0); + i830_wm_info.fifo_size = dev_priv->display.get_fifo_size(dev, 0); planea_wm = intel_calculate_wm(planea_clock, &i830_wm_info, pixel_size, latency_ns); @@ -2264,6 +2579,7 @@ static void i830_update_wm(struct drm_device *dev, int planea_clock, */ static void intel_update_watermarks(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_crtc *crtc; struct intel_crtc *intel_crtc; int sr_hdisplay = 0; @@ -2302,15 +2618,8 @@ static void intel_update_watermarks(struct drm_device *dev) else if (IS_IGD(dev)) igd_disable_cxsr(dev); - if (IS_G4X(dev)) - g4x_update_wm(dev); - else if (IS_I965G(dev)) - i965_update_wm(dev); - else if (IS_I9XX(dev) || IS_MOBILE(dev)) - i9xx_update_wm(dev, planea_clock, planeb_clock, sr_hdisplay, - pixel_size); - else - i830_update_wm(dev, planea_clock, pixel_size); + dev_priv->display.update_wm(dev, planea_clock, planeb_clock, + sr_hdisplay, pixel_size); } static int intel_crtc_mode_set(struct drm_crtc *crtc, @@ -2323,10 +2632,11 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; + int plane = intel_crtc->plane; int fp_reg = (pipe == 0) ? FPA0 : FPB0; int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B; int dpll_md_reg = (intel_crtc->pipe == 0) ? DPLL_A_MD : DPLL_B_MD; - int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR; + int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR; int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF; int htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B; int hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B; @@ -2334,8 +2644,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, int vtot_reg = (pipe == 0) ? VTOTAL_A : VTOTAL_B; int vblank_reg = (pipe == 0) ? VBLANK_A : VBLANK_B; int vsync_reg = (pipe == 0) ? VSYNC_A : VSYNC_B; - int dspsize_reg = (pipe == 0) ? DSPASIZE : DSPBSIZE; - int dsppos_reg = (pipe == 0) ? DSPAPOS : DSPBPOS; + int dspsize_reg = (plane == 0) ? DSPASIZE : DSPBSIZE; + int dsppos_reg = (plane == 0) ? DSPAPOS : DSPBPOS; int pipesrc_reg = (pipe == 0) ? PIPEASRC : PIPEBSRC; int refclk, num_outputs = 0; intel_clock_t clock, reduced_clock; @@ -2568,7 +2878,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, enable color space conversion */ if (!IS_IGDNG(dev)) { if (pipe == 0) - dspcntr |= DISPPLANE_SEL_PIPE_A; + dspcntr &= ~DISPPLANE_SEL_PIPE_MASK; else dspcntr |= DISPPLANE_SEL_PIPE_B; } @@ -2580,7 +2890,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, * XXX: No double-wide on 915GM pipe B. Is that the only reason for the * pipe == 0 check? */ - if (mode->clock > intel_get_core_clock_speed(dev) * 9 / 10) + if (mode->clock > + dev_priv->display.get_display_clock_speed(dev) * 9 / 10) pipeconf |= PIPEACONF_DOUBLE_WIDE; else pipeconf &= ~PIPEACONF_DOUBLE_WIDE; @@ -2652,9 +2963,12 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, udelay(150); if (IS_I965G(dev) && !IS_IGDNG(dev)) { - sdvo_pixel_multiply = adjusted_mode->clock / mode->clock; - I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) | + if (is_sdvo) { + sdvo_pixel_multiply = adjusted_mode->clock / mode->clock; + I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) | ((sdvo_pixel_multiply - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT)); + } else + I915_WRITE(dpll_md_reg, 0); } else { /* write it again -- the BIOS does, after all */ I915_WRITE(dpll_reg, dpll); @@ -2734,6 +3048,9 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, /* Flush the plane changes */ ret = intel_pipe_set_base(crtc, x, y, old_fb); + if ((IS_I965G(dev) || plane == 0)) + intel_update_fbc(crtc, &crtc->mode); + intel_update_watermarks(dev); drm_vblank_post_modeset(dev, pipe); @@ -2778,6 +3095,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, struct drm_gem_object *bo; struct drm_i915_gem_object *obj_priv; int pipe = intel_crtc->pipe; + int plane = intel_crtc->plane; uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR; uint32_t base = (pipe == 0) ? CURABASE : CURBBASE; uint32_t temp = I915_READ(control); @@ -2863,6 +3181,10 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, i915_gem_object_unpin(intel_crtc->cursor_bo); drm_gem_object_unreference(intel_crtc->cursor_bo); } + + if ((IS_I965G(dev) || plane == 0)) + intel_update_fbc(crtc, &crtc->mode); + mutex_unlock(&dev->struct_mutex); intel_crtc->cursor_addr = addr; @@ -3544,6 +3866,14 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) intel_crtc->lut_b[i] = i; } + /* Swap pipes & planes for FBC on pre-965 */ + intel_crtc->pipe = pipe; + intel_crtc->plane = pipe; + if (IS_MOBILE(dev) && (IS_I9XX(dev) && !IS_I965G(dev))) { + DRM_DEBUG("swapping pipes & planes for FBC\n"); + intel_crtc->plane = ((pipe == 0) ? 1 : 0); + } + intel_crtc->cursor_addr = 0; intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF; drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); @@ -3826,6 +4156,73 @@ void intel_init_clock_gating(struct drm_device *dev) } } +/* Set up chip specific display functions */ +static void intel_init_display(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + /* We always want a DPMS function */ + if (IS_IGDNG(dev)) + dev_priv->display.dpms = igdng_crtc_dpms; + else + dev_priv->display.dpms = i9xx_crtc_dpms; + + /* Only mobile has FBC, leave pointers NULL for other chips */ + if (IS_MOBILE(dev)) { + if (IS_GM45(dev)) { + dev_priv->display.fbc_enabled = g4x_fbc_enabled; + dev_priv->display.enable_fbc = g4x_enable_fbc; + dev_priv->display.disable_fbc = g4x_disable_fbc; + } else if (IS_I965GM(dev) || IS_I945GM(dev) || IS_I915GM(dev)) { + dev_priv->display.fbc_enabled = i8xx_fbc_enabled; + dev_priv->display.enable_fbc = i8xx_enable_fbc; + dev_priv->display.disable_fbc = i8xx_disable_fbc; + } + /* 855GM needs testing */ + } + + /* Returns the core display clock speed */ + if (IS_I945G(dev)) + dev_priv->display.get_display_clock_speed = + i945_get_display_clock_speed; + else if (IS_I915G(dev)) + dev_priv->display.get_display_clock_speed = + i915_get_display_clock_speed; + else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev)) + dev_priv->display.get_display_clock_speed = + i9xx_misc_get_display_clock_speed; + else if (IS_I915GM(dev)) + dev_priv->display.get_display_clock_speed = + i915gm_get_display_clock_speed; + else if (IS_I865G(dev)) + dev_priv->display.get_display_clock_speed = + i865_get_display_clock_speed; + else if (IS_I855(dev)) + dev_priv->display.get_display_clock_speed = + i855_get_display_clock_speed; + else /* 852, 830 */ + dev_priv->display.get_display_clock_speed = + i830_get_display_clock_speed; + + /* For FIFO watermark updates */ + if (IS_G4X(dev)) + dev_priv->display.update_wm = g4x_update_wm; + else if (IS_I965G(dev)) + dev_priv->display.update_wm = i965_update_wm; + else if (IS_I9XX(dev) || IS_MOBILE(dev)) { + dev_priv->display.update_wm = i9xx_update_wm; + dev_priv->display.get_fifo_size = i9xx_get_fifo_size; + } else { + if (IS_I85X(dev)) + dev_priv->display.get_fifo_size = i85x_get_fifo_size; + else if (IS_845G(dev)) + dev_priv->display.get_fifo_size = i845_get_fifo_size; + else + dev_priv->display.get_fifo_size = i830_get_fifo_size; + dev_priv->display.update_wm = i830_update_wm; + } +} + void intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -3839,6 +4236,8 @@ void intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = (void *)&intel_mode_funcs; + intel_init_display(dev); + if (IS_I965G(dev)) { dev->mode_config.max_width = 8192; dev->mode_config.max_height = 8192; @@ -3904,6 +4303,9 @@ void intel_modeset_cleanup(struct drm_device *dev) mutex_unlock(&dev->struct_mutex); + if (dev_priv->display.disable_fbc) + dev_priv->display.disable_fbc(dev); + drm_mode_config_cleanup(dev); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 3ebbbabfe59..8aa4b7f30da 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -28,6 +28,7 @@ #include <linux/i2c.h> #include <linux/i2c-id.h> #include <linux/i2c-algo-bit.h> +#include "i915_drv.h" #include "drm_crtc.h" #include "drm_crtc_helper.h" @@ -111,8 +112,8 @@ struct intel_output { struct intel_crtc { struct drm_crtc base; - int pipe; - int plane; + enum pipe pipe; + enum plane plane; struct drm_gem_object *cursor_bo; uint32_t cursor_addr; u8 lut_r[256], lut_g[256], lut_b[256]; diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index dafc0da1c25..98ae3d73577 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -27,6 +27,7 @@ * Jesse Barnes <jesse.barnes@intel.com> */ +#include <acpi/button.h> #include <linux/dmi.h> #include <linux/i2c.h> #include "drmP.h" @@ -295,6 +296,10 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder, goto out; } + /* full screen scale for now */ + if (IS_IGDNG(dev)) + goto out; + /* 965+ wants fuzzy fitting */ if (IS_I965G(dev)) pfit_control |= (intel_crtc->pipe << PFIT_PIPE_SHIFT) | @@ -322,8 +327,10 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder, * to register description and PRM. * Change the value here to see the borders for debugging */ - I915_WRITE(BCLRPAT_A, 0); - I915_WRITE(BCLRPAT_B, 0); + if (!IS_IGDNG(dev)) { + I915_WRITE(BCLRPAT_A, 0); + I915_WRITE(BCLRPAT_B, 0); + } switch (lvds_priv->fitting_mode) { case DRM_MODE_SCALE_CENTER: @@ -572,7 +579,6 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder, * settings. */ - /* No panel fitting yet, fixme */ if (IS_IGDNG(dev)) return; @@ -585,15 +591,33 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder, I915_WRITE(PFIT_CONTROL, lvds_priv->pfit_control); } +/* Some lid devices report incorrect lid status, assume they're connected */ +static const struct dmi_system_id bad_lid_status[] = { + { + .ident = "Aspire One", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire one"), + }, + }, + { } +}; + /** * Detect the LVDS connection. * - * This always returns CONNECTOR_STATUS_CONNECTED. This connector should only have - * been set up if the LVDS was actually connected anyway. + * Since LVDS doesn't have hotlug, we use the lid as a proxy. Open means + * connected and closed means disconnected. We also send hotplug events as + * needed, using lid status notification from the input layer. */ static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector) { - return connector_status_connected; + enum drm_connector_status status = connector_status_connected; + + if (!acpi_lid_open() && !dmi_check_system(bad_lid_status)) + status = connector_status_disconnected; + + return status; } /** @@ -632,6 +656,24 @@ static int intel_lvds_get_modes(struct drm_connector *connector) return 0; } +static int intel_lid_notify(struct notifier_block *nb, unsigned long val, + void *unused) +{ + struct drm_i915_private *dev_priv = + container_of(nb, struct drm_i915_private, lid_notifier); + struct drm_device *dev = dev_priv->dev; + + if (acpi_lid_open() && !dev_priv->suspended) { + mutex_lock(&dev->mode_config.mutex); + drm_helper_resume_force_mode(dev); + mutex_unlock(&dev->mode_config.mutex); + } + + drm_sysfs_hotplug_event(dev_priv->dev); + + return NOTIFY_OK; +} + /** * intel_lvds_destroy - unregister and free LVDS structures * @connector: connector to free @@ -641,10 +683,14 @@ static int intel_lvds_get_modes(struct drm_connector *connector) */ static void intel_lvds_destroy(struct drm_connector *connector) { + struct drm_device *dev = connector->dev; struct intel_output *intel_output = to_intel_output(connector); + struct drm_i915_private *dev_priv = dev->dev_private; if (intel_output->ddc_bus) intel_i2c_destroy(intel_output->ddc_bus); + if (dev_priv->lid_notifier.notifier_call) + acpi_lid_notifier_unregister(&dev_priv->lid_notifier); drm_sysfs_connector_remove(connector); drm_connector_cleanup(connector); kfree(connector); @@ -1011,6 +1057,11 @@ out: pwm |= PWM_PCH_ENABLE; I915_WRITE(BLC_PWM_PCH_CTL1, pwm); } + dev_priv->lid_notifier.notifier_call = intel_lid_notify; + if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) { + DRM_DEBUG("lid notifier registration failed\n"); + dev_priv->lid_notifier.notifier_call = NULL; + } drm_sysfs_connector_add(connector); return; diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 0bf28efcf2c..083bec2e50f 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -135,6 +135,30 @@ struct intel_sdvo_priv { struct intel_sdvo_dtd save_input_dtd_1, save_input_dtd_2; struct intel_sdvo_dtd save_output_dtd[16]; u32 save_SDVOX; + /* add the property for the SDVO-TV */ + struct drm_property *left_property; + struct drm_property *right_property; + struct drm_property *top_property; + struct drm_property *bottom_property; + struct drm_property *hpos_property; + struct drm_property *vpos_property; + + /* add the property for the SDVO-TV/LVDS */ + struct drm_property *brightness_property; + struct drm_property *contrast_property; + struct drm_property *saturation_property; + struct drm_property *hue_property; + + /* Add variable to record current setting for the above property */ + u32 left_margin, right_margin, top_margin, bottom_margin; + /* this is to get the range of margin.*/ + u32 max_hscan, max_vscan; + u32 max_hpos, cur_hpos; + u32 max_vpos, cur_vpos; + u32 cur_brightness, max_brightness; + u32 cur_contrast, max_contrast; + u32 cur_saturation, max_saturation; + u32 cur_hue, max_hue; }; static bool @@ -281,6 +305,31 @@ static const struct _sdvo_cmd_name { SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT), SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT), SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS), + /* Add the op code for SDVO enhancements */ + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_H), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_H), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_H), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_V), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_V), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_V), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SATURATION), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SATURATION), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SATURATION), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HUE), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HUE), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HUE), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_CONTRAST), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CONTRAST), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTRAST), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_BRIGHTNESS), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_BRIGHTNESS), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_BRIGHTNESS), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_H), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_H), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_H), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_V), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_V), + SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_V), /* HDMI op code */ SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE), SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE), @@ -981,7 +1030,7 @@ static void intel_sdvo_set_tv_format(struct intel_output *output) status = intel_sdvo_read_response(output, NULL, 0); if (status != SDVO_CMD_STATUS_SUCCESS) - DRM_DEBUG("%s: Failed to set TV format\n", + DRM_DEBUG_KMS("%s: Failed to set TV format\n", SDVO_NAME(sdvo_priv)); } @@ -1792,6 +1841,45 @@ static int intel_sdvo_get_modes(struct drm_connector *connector) return 1; } +static +void intel_sdvo_destroy_enhance_property(struct drm_connector *connector) +{ + struct intel_output *intel_output = to_intel_output(connector); + struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; + struct drm_device *dev = connector->dev; + + if (sdvo_priv->is_tv) { + if (sdvo_priv->left_property) + drm_property_destroy(dev, sdvo_priv->left_property); + if (sdvo_priv->right_property) + drm_property_destroy(dev, sdvo_priv->right_property); + if (sdvo_priv->top_property) + drm_property_destroy(dev, sdvo_priv->top_property); + if (sdvo_priv->bottom_property) + drm_property_destroy(dev, sdvo_priv->bottom_property); + if (sdvo_priv->hpos_property) + drm_property_destroy(dev, sdvo_priv->hpos_property); + if (sdvo_priv->vpos_property) + drm_property_destroy(dev, sdvo_priv->vpos_property); + } + if (sdvo_priv->is_tv) { + if (sdvo_priv->saturation_property) + drm_property_destroy(dev, + sdvo_priv->saturation_property); + if (sdvo_priv->contrast_property) + drm_property_destroy(dev, + sdvo_priv->contrast_property); + if (sdvo_priv->hue_property) + drm_property_destroy(dev, sdvo_priv->hue_property); + } + if (sdvo_priv->is_tv || sdvo_priv->is_lvds) { + if (sdvo_priv->brightness_property) + drm_property_destroy(dev, + sdvo_priv->brightness_property); + } + return; +} + static void intel_sdvo_destroy(struct drm_connector *connector) { struct intel_output *intel_output = to_intel_output(connector); @@ -1812,6 +1900,9 @@ static void intel_sdvo_destroy(struct drm_connector *connector) drm_property_destroy(connector->dev, sdvo_priv->tv_format_property); + if (sdvo_priv->is_tv || sdvo_priv->is_lvds) + intel_sdvo_destroy_enhance_property(connector); + drm_sysfs_connector_remove(connector); drm_connector_cleanup(connector); @@ -1829,6 +1920,8 @@ intel_sdvo_set_property(struct drm_connector *connector, struct drm_crtc *crtc = encoder->crtc; int ret = 0; bool changed = false; + uint8_t cmd, status; + uint16_t temp_value; ret = drm_connector_property_set_value(connector, property, val); if (ret < 0) @@ -1845,11 +1938,102 @@ intel_sdvo_set_property(struct drm_connector *connector, sdvo_priv->tv_format_name = sdvo_priv->tv_format_supported[val]; changed = true; - } else { - ret = -EINVAL; - goto out; } + if (sdvo_priv->is_tv || sdvo_priv->is_lvds) { + cmd = 0; + temp_value = val; + if (sdvo_priv->left_property == property) { + drm_connector_property_set_value(connector, + sdvo_priv->right_property, val); + if (sdvo_priv->left_margin == temp_value) + goto out; + + sdvo_priv->left_margin = temp_value; + sdvo_priv->right_margin = temp_value; + temp_value = sdvo_priv->max_hscan - + sdvo_priv->left_margin; + cmd = SDVO_CMD_SET_OVERSCAN_H; + } else if (sdvo_priv->right_property == property) { + drm_connector_property_set_value(connector, + sdvo_priv->left_property, val); + if (sdvo_priv->right_margin == temp_value) + goto out; + + sdvo_priv->left_margin = temp_value; + sdvo_priv->right_margin = temp_value; + temp_value = sdvo_priv->max_hscan - + sdvo_priv->left_margin; + cmd = SDVO_CMD_SET_OVERSCAN_H; + } else if (sdvo_priv->top_property == property) { + drm_connector_property_set_value(connector, + sdvo_priv->bottom_property, val); + if (sdvo_priv->top_margin == temp_value) + goto out; + + sdvo_priv->top_margin = temp_value; + sdvo_priv->bottom_margin = temp_value; + temp_value = sdvo_priv->max_vscan - + sdvo_priv->top_margin; + cmd = SDVO_CMD_SET_OVERSCAN_V; + } else if (sdvo_priv->bottom_property == property) { + drm_connector_property_set_value(connector, + sdvo_priv->top_property, val); + if (sdvo_priv->bottom_margin == temp_value) + goto out; + sdvo_priv->top_margin = temp_value; + sdvo_priv->bottom_margin = temp_value; + temp_value = sdvo_priv->max_vscan - + sdvo_priv->top_margin; + cmd = SDVO_CMD_SET_OVERSCAN_V; + } else if (sdvo_priv->hpos_property == property) { + if (sdvo_priv->cur_hpos == temp_value) + goto out; + + cmd = SDVO_CMD_SET_POSITION_H; + sdvo_priv->cur_hpos = temp_value; + } else if (sdvo_priv->vpos_property == property) { + if (sdvo_priv->cur_vpos == temp_value) + goto out; + + cmd = SDVO_CMD_SET_POSITION_V; + sdvo_priv->cur_vpos = temp_value; + } else if (sdvo_priv->saturation_property == property) { + if (sdvo_priv->cur_saturation == temp_value) + goto out; + + cmd = SDVO_CMD_SET_SATURATION; + sdvo_priv->cur_saturation = temp_value; + } else if (sdvo_priv->contrast_property == property) { + if (sdvo_priv->cur_contrast == temp_value) + goto out; + + cmd = SDVO_CMD_SET_CONTRAST; + sdvo_priv->cur_contrast = temp_value; + } else if (sdvo_priv->hue_property == property) { + if (sdvo_priv->cur_hue == temp_value) + goto out; + + cmd = SDVO_CMD_SET_HUE; + sdvo_priv->cur_hue = temp_value; + } else if (sdvo_priv->brightness_property == property) { + if (sdvo_priv->cur_brightness == temp_value) + goto out; + + cmd = SDVO_CMD_SET_BRIGHTNESS; + sdvo_priv->cur_brightness = temp_value; + } + if (cmd) { + intel_sdvo_write_cmd(intel_output, cmd, &temp_value, 2); + status = intel_sdvo_read_response(intel_output, + NULL, 0); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO command \n"); + return -EINVAL; + } + changed = true; + } + } if (changed && crtc) drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x, crtc->y, crtc->fb); @@ -2090,6 +2274,8 @@ intel_sdvo_output_setup(struct intel_output *intel_output, uint16_t flags) sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1; encoder->encoder_type = DRM_MODE_ENCODER_DAC; connector->connector_type = DRM_MODE_CONNECTOR_VGA; + intel_output->clone_mask = (1 << INTEL_SDVO_NON_TV_CLONE_BIT) | + (1 << INTEL_ANALOG_CLONE_BIT); } else if (flags & SDVO_OUTPUT_LVDS0) { sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0; @@ -2176,6 +2362,310 @@ static void intel_sdvo_tv_create_property(struct drm_connector *connector) } +static void intel_sdvo_create_enhance_property(struct drm_connector *connector) +{ + struct intel_output *intel_output = to_intel_output(connector); + struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; + struct intel_sdvo_enhancements_reply sdvo_data; + struct drm_device *dev = connector->dev; + uint8_t status; + uint16_t response, data_value[2]; + + intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS, + NULL, 0); + status = intel_sdvo_read_response(intel_output, &sdvo_data, + sizeof(sdvo_data)); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS(" incorrect response is returned\n"); + return; + } + response = *((uint16_t *)&sdvo_data); + if (!response) { + DRM_DEBUG_KMS("No enhancement is supported\n"); + return; + } + if (sdvo_priv->is_tv) { + /* when horizontal overscan is supported, Add the left/right + * property + */ + if (sdvo_data.overscan_h) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_OVERSCAN_H, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO max " + "h_overscan\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_OVERSCAN_H, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO h_overscan\n"); + return; + } + sdvo_priv->max_hscan = data_value[0]; + sdvo_priv->left_margin = data_value[0] - response; + sdvo_priv->right_margin = sdvo_priv->left_margin; + sdvo_priv->left_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "left_margin", 2); + sdvo_priv->left_property->values[0] = 0; + sdvo_priv->left_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->left_property, + sdvo_priv->left_margin); + sdvo_priv->right_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "right_margin", 2); + sdvo_priv->right_property->values[0] = 0; + sdvo_priv->right_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->right_property, + sdvo_priv->right_margin); + DRM_DEBUG_KMS("h_overscan: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + if (sdvo_data.overscan_v) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_OVERSCAN_V, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO max " + "v_overscan\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_OVERSCAN_V, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO v_overscan\n"); + return; + } + sdvo_priv->max_vscan = data_value[0]; + sdvo_priv->top_margin = data_value[0] - response; + sdvo_priv->bottom_margin = sdvo_priv->top_margin; + sdvo_priv->top_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "top_margin", 2); + sdvo_priv->top_property->values[0] = 0; + sdvo_priv->top_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->top_property, + sdvo_priv->top_margin); + sdvo_priv->bottom_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "bottom_margin", 2); + sdvo_priv->bottom_property->values[0] = 0; + sdvo_priv->bottom_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->bottom_property, + sdvo_priv->bottom_margin); + DRM_DEBUG_KMS("v_overscan: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + if (sdvo_data.position_h) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_POSITION_H, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO Max h_pos\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_POSITION_H, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO get h_postion\n"); + return; + } + sdvo_priv->max_hpos = data_value[0]; + sdvo_priv->cur_hpos = response; + sdvo_priv->hpos_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "hpos", 2); + sdvo_priv->hpos_property->values[0] = 0; + sdvo_priv->hpos_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->hpos_property, + sdvo_priv->cur_hpos); + DRM_DEBUG_KMS("h_position: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + if (sdvo_data.position_v) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_POSITION_V, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO Max v_pos\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_POSITION_V, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO get v_postion\n"); + return; + } + sdvo_priv->max_vpos = data_value[0]; + sdvo_priv->cur_vpos = response; + sdvo_priv->vpos_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "vpos", 2); + sdvo_priv->vpos_property->values[0] = 0; + sdvo_priv->vpos_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->vpos_property, + sdvo_priv->cur_vpos); + DRM_DEBUG_KMS("v_position: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + } + if (sdvo_priv->is_tv) { + if (sdvo_data.saturation) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_SATURATION, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO Max sat\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_SATURATION, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO get sat\n"); + return; + } + sdvo_priv->max_saturation = data_value[0]; + sdvo_priv->cur_saturation = response; + sdvo_priv->saturation_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "saturation", 2); + sdvo_priv->saturation_property->values[0] = 0; + sdvo_priv->saturation_property->values[1] = + data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->saturation_property, + sdvo_priv->cur_saturation); + DRM_DEBUG_KMS("saturation: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + if (sdvo_data.contrast) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_CONTRAST, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO Max contrast\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_CONTRAST, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO get contrast\n"); + return; + } + sdvo_priv->max_contrast = data_value[0]; + sdvo_priv->cur_contrast = response; + sdvo_priv->contrast_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "contrast", 2); + sdvo_priv->contrast_property->values[0] = 0; + sdvo_priv->contrast_property->values[1] = data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->contrast_property, + sdvo_priv->cur_contrast); + DRM_DEBUG_KMS("contrast: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + if (sdvo_data.hue) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_HUE, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO Max hue\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_HUE, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO get hue\n"); + return; + } + sdvo_priv->max_hue = data_value[0]; + sdvo_priv->cur_hue = response; + sdvo_priv->hue_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "hue", 2); + sdvo_priv->hue_property->values[0] = 0; + sdvo_priv->hue_property->values[1] = + data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->hue_property, + sdvo_priv->cur_hue); + DRM_DEBUG_KMS("hue: max %d, default %d, current %d\n", + data_value[0], data_value[1], response); + } + } + if (sdvo_priv->is_tv || sdvo_priv->is_lvds) { + if (sdvo_data.brightness) { + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_MAX_BRIGHTNESS, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &data_value, 4); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO Max bright\n"); + return; + } + intel_sdvo_write_cmd(intel_output, + SDVO_CMD_GET_BRIGHTNESS, NULL, 0); + status = intel_sdvo_read_response(intel_output, + &response, 2); + if (status != SDVO_CMD_STATUS_SUCCESS) { + DRM_DEBUG_KMS("Incorrect SDVO get brigh\n"); + return; + } + sdvo_priv->max_brightness = data_value[0]; + sdvo_priv->cur_brightness = response; + sdvo_priv->brightness_property = + drm_property_create(dev, DRM_MODE_PROP_RANGE, + "brightness", 2); + sdvo_priv->brightness_property->values[0] = 0; + sdvo_priv->brightness_property->values[1] = + data_value[0]; + drm_connector_attach_property(connector, + sdvo_priv->brightness_property, + sdvo_priv->cur_brightness); + DRM_DEBUG_KMS("brightness: max %d, " + "default %d, current %d\n", + data_value[0], data_value[1], response); + } + } + return; +} + bool intel_sdvo_init(struct drm_device *dev, int output_device) { struct drm_connector *connector; @@ -2264,6 +2754,10 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc); if (sdvo_priv->is_tv) intel_sdvo_tv_create_property(connector); + + if (sdvo_priv->is_tv || sdvo_priv->is_lvds) + intel_sdvo_create_enhance_property(connector); + drm_sysfs_connector_add(connector); intel_sdvo_select_ddc_bus(sdvo_priv); diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index ed7711d11ae..6857560144b 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -325,34 +325,6 @@ config SENSORS_F75375S This driver can also be built as a module. If so, the module will be called f75375s. -config SENSORS_FSCHER - tristate "FSC Hermes (DEPRECATED)" - depends on X86 && I2C - help - This driver is DEPRECATED please use the new merged fschmd - ("FSC Poseidon, Scylla, Hermes, Heimdall and Heracles") driver - instead. - - If you say yes here you get support for Fujitsu Siemens - Computers Hermes sensor chips. - - This driver can also be built as a module. If so, the module - will be called fscher. - -config SENSORS_FSCPOS - tristate "FSC Poseidon (DEPRECATED)" - depends on X86 && I2C - help - This driver is DEPRECATED please use the new merged fschmd - ("FSC Poseidon, Scylla, Hermes, Heimdall and Heracles") driver - instead. - - If you say yes here you get support for Fujitsu Siemens - Computers Poseidon sensor chips. - - This driver can also be built as a module. If so, the module - will be called fscpos. - config SENSORS_FSCHMD tristate "Fujitsu Siemens Computers sensor chips" depends on X86 && I2C @@ -401,12 +373,12 @@ config SENSORS_GL520SM will be called gl520sm. config SENSORS_CORETEMP - tristate "Intel Core (2) Duo/Solo temperature sensor" + tristate "Intel Core/Core2/Atom temperature sensor" depends on X86 && EXPERIMENTAL help If you say yes here you get support for the temperature - sensor inside your CPU. Supported all are all known variants - of Intel Core family. + sensor inside your CPU. Most of the family 6 CPUs + are supported. Check documentation/driver for details. config SENSORS_IBMAEM tristate "IBM Active Energy Manager temperature/power sensors and control" diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index bcf73a9bb61..9f46cb019cc 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -42,9 +42,7 @@ obj-$(CONFIG_SENSORS_DS1621) += ds1621.o obj-$(CONFIG_SENSORS_F71805F) += f71805f.o obj-$(CONFIG_SENSORS_F71882FG) += f71882fg.o obj-$(CONFIG_SENSORS_F75375S) += f75375s.o -obj-$(CONFIG_SENSORS_FSCHER) += fscher.o obj-$(CONFIG_SENSORS_FSCHMD) += fschmd.o -obj-$(CONFIG_SENSORS_FSCPOS) += fscpos.o obj-$(CONFIG_SENSORS_G760A) += g760a.o obj-$(CONFIG_SENSORS_GL518SM) += gl518sm.o obj-$(CONFIG_SENSORS_GL520SM) += gl520sm.o diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c index 789441830cd..56905955352 100644 --- a/drivers/hwmon/adm1031.c +++ b/drivers/hwmon/adm1031.c @@ -37,6 +37,7 @@ #define ADM1031_REG_PWM (0x22) #define ADM1031_REG_FAN_MIN(nr) (0x10 + (nr)) +#define ADM1031_REG_TEMP_OFFSET(nr) (0x0d + (nr)) #define ADM1031_REG_TEMP_MAX(nr) (0x14 + 4 * (nr)) #define ADM1031_REG_TEMP_MIN(nr) (0x15 + 4 * (nr)) #define ADM1031_REG_TEMP_CRIT(nr) (0x16 + 4 * (nr)) @@ -93,6 +94,7 @@ struct adm1031_data { u8 auto_temp_min[3]; u8 auto_temp_off[3]; u8 auto_temp_max[3]; + s8 temp_offset[3]; s8 temp_min[3]; s8 temp_max[3]; s8 temp_crit[3]; @@ -145,6 +147,10 @@ adm1031_write_value(struct i2c_client *client, u8 reg, unsigned int value) #define TEMP_FROM_REG_EXT(val, ext) (TEMP_FROM_REG(val) + (ext) * 125) +#define TEMP_OFFSET_TO_REG(val) (TEMP_TO_REG(val) & 0x8f) +#define TEMP_OFFSET_FROM_REG(val) TEMP_FROM_REG((val) < 0 ? \ + (val) | 0x70 : (val)) + #define FAN_FROM_REG(reg, div) ((reg) ? (11250 * 60) / ((reg) * (div)) : 0) static int FAN_TO_REG(int reg, int div) @@ -585,6 +591,14 @@ static ssize_t show_temp(struct device *dev, (((data->ext_temp[nr] >> ((nr - 1) * 3)) & 7)); return sprintf(buf, "%d\n", TEMP_FROM_REG_EXT(data->temp[nr], ext)); } +static ssize_t show_temp_offset(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int nr = to_sensor_dev_attr(attr)->index; + struct adm1031_data *data = adm1031_update_device(dev); + return sprintf(buf, "%d\n", + TEMP_OFFSET_FROM_REG(data->temp_offset[nr])); +} static ssize_t show_temp_min(struct device *dev, struct device_attribute *attr, char *buf) { @@ -606,6 +620,24 @@ static ssize_t show_temp_crit(struct device *dev, struct adm1031_data *data = adm1031_update_device(dev); return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_crit[nr])); } +static ssize_t set_temp_offset(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct adm1031_data *data = i2c_get_clientdata(client); + int nr = to_sensor_dev_attr(attr)->index; + int val; + + val = simple_strtol(buf, NULL, 10); + val = SENSORS_LIMIT(val, -15000, 15000); + mutex_lock(&data->update_lock); + data->temp_offset[nr] = TEMP_OFFSET_TO_REG(val); + adm1031_write_value(client, ADM1031_REG_TEMP_OFFSET(nr), + data->temp_offset[nr]); + mutex_unlock(&data->update_lock); + return count; +} static ssize_t set_temp_min(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -661,6 +693,8 @@ static ssize_t set_temp_crit(struct device *dev, struct device_attribute *attr, #define temp_reg(offset) \ static SENSOR_DEVICE_ATTR(temp##offset##_input, S_IRUGO, \ show_temp, NULL, offset - 1); \ +static SENSOR_DEVICE_ATTR(temp##offset##_offset, S_IRUGO | S_IWUSR, \ + show_temp_offset, set_temp_offset, offset - 1); \ static SENSOR_DEVICE_ATTR(temp##offset##_min, S_IRUGO | S_IWUSR, \ show_temp_min, set_temp_min, offset - 1); \ static SENSOR_DEVICE_ATTR(temp##offset##_max, S_IRUGO | S_IWUSR, \ @@ -714,6 +748,7 @@ static struct attribute *adm1031_attributes[] = { &sensor_dev_attr_pwm1.dev_attr.attr, &sensor_dev_attr_auto_fan1_channel.dev_attr.attr, &sensor_dev_attr_temp1_input.dev_attr.attr, + &sensor_dev_attr_temp1_offset.dev_attr.attr, &sensor_dev_attr_temp1_min.dev_attr.attr, &sensor_dev_attr_temp1_min_alarm.dev_attr.attr, &sensor_dev_attr_temp1_max.dev_attr.attr, @@ -721,6 +756,7 @@ static struct attribute *adm1031_attributes[] = { &sensor_dev_attr_temp1_crit.dev_attr.attr, &sensor_dev_attr_temp1_crit_alarm.dev_attr.attr, &sensor_dev_attr_temp2_input.dev_attr.attr, + &sensor_dev_attr_temp2_offset.dev_attr.attr, &sensor_dev_attr_temp2_min.dev_attr.attr, &sensor_dev_attr_temp2_min_alarm.dev_attr.attr, &sensor_dev_attr_temp2_max.dev_attr.attr, @@ -757,6 +793,7 @@ static struct attribute *adm1031_attributes_opt[] = { &sensor_dev_attr_pwm2.dev_attr.attr, &sensor_dev_attr_auto_fan2_channel.dev_attr.attr, &sensor_dev_attr_temp3_input.dev_attr.attr, + &sensor_dev_attr_temp3_offset.dev_attr.attr, &sensor_dev_attr_temp3_min.dev_attr.attr, &sensor_dev_attr_temp3_min_alarm.dev_attr.attr, &sensor_dev_attr_temp3_max.dev_attr.attr, @@ -937,6 +974,9 @@ static struct adm1031_data *adm1031_update_device(struct device *dev) } data->temp[chan] = newh; + data->temp_offset[chan] = + adm1031_read_value(client, + ADM1031_REG_TEMP_OFFSET(chan)); data->temp_min[chan] = adm1031_read_value(client, ADM1031_REG_TEMP_MIN(chan)); diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 972cf4ba963..caef39cda8c 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -157,17 +157,26 @@ static int __devinit adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device * /* The 100C is default for both mobile and non mobile CPUs */ int tjmax = 100000; - int ismobile = 1; + int tjmax_ee = 85000; + int usemsr_ee = 1; int err; u32 eax, edx; /* Early chips have no MSR for TjMax */ if ((c->x86_model == 0xf) && (c->x86_mask < 4)) { - ismobile = 0; + usemsr_ee = 0; } - if ((c->x86_model > 0xe) && (ismobile)) { + /* Atoms seems to have TjMax at 90C */ + + if (c->x86_model == 0x1c) { + usemsr_ee = 0; + tjmax = 90000; + } + + if ((c->x86_model > 0xe) && (usemsr_ee)) { + u8 platform_id; /* Now we can detect the mobile CPU using Intel provided table http://softwarecommunity.intel.com/Wiki/Mobility/720.htm @@ -179,13 +188,29 @@ static int __devinit adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device * dev_warn(dev, "Unable to access MSR 0x17, assuming desktop" " CPU\n"); - ismobile = 0; - } else if (!(eax & 0x10000000)) { - ismobile = 0; + usemsr_ee = 0; + } else if (c->x86_model < 0x17 && !(eax & 0x10000000)) { + /* Trust bit 28 up to Penryn, I could not find any + documentation on that; if you happen to know + someone at Intel please ask */ + usemsr_ee = 0; + } else { + /* Platform ID bits 52:50 (EDX starts at bit 32) */ + platform_id = (edx >> 18) & 0x7; + + /* Mobile Penryn CPU seems to be platform ID 7 or 5 + (guesswork) */ + if ((c->x86_model == 0x17) && + ((platform_id == 5) || (platform_id == 7))) { + /* If MSR EE bit is set, set it to 90 degrees C, + otherwise 105 degrees C */ + tjmax_ee = 90000; + tjmax = 105000; + } } } - if (ismobile || c->x86_model == 0x1c) { + if (usemsr_ee) { err = rdmsr_safe_on_cpu(id, 0xee, &eax, &edx); if (err) { @@ -193,9 +218,11 @@ static int __devinit adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device * "Unable to access MSR 0xEE, for Tjmax, left" " at default"); } else if (eax & 0x40000000) { - tjmax = 85000; + tjmax = tjmax_ee; } - } else { + /* if we dont use msr EE it means we are desktop CPU (with exeception + of Atom) */ + } else if (tjmax == 100000) { dev_warn(dev, "Using relative temperature scale!\n"); } @@ -248,9 +275,9 @@ static int __devinit coretemp_probe(struct platform_device *pdev) platform_set_drvdata(pdev, data); /* read the still undocumented IA32_TEMPERATURE_TARGET it exists - on older CPUs but not in this register */ + on older CPUs but not in this register, Atoms don't have it either */ - if (c->x86_model > 0xe) { + if ((c->x86_model > 0xe) && (c->x86_model != 0x1c)) { err = rdmsr_safe_on_cpu(data->id, 0x1a2, &eax, &edx); if (err) { dev_warn(&pdev->dev, "Unable to read" @@ -413,11 +440,15 @@ static int __init coretemp_init(void) for_each_online_cpu(i) { struct cpuinfo_x86 *c = &cpu_data(i); - /* check if family 6, models 0xe, 0xf, 0x16, 0x17, 0x1A */ + /* check if family 6, models 0xe (Pentium M DC), + 0xf (Core 2 DC 65nm), 0x16 (Core 2 SC 65nm), + 0x17 (Penryn 45nm), 0x1a (Nehalem), 0x1c (Atom), + 0x1e (Lynnfield) */ if ((c->cpuid_level < 0) || (c->x86 != 0x6) || !((c->x86_model == 0xe) || (c->x86_model == 0xf) || (c->x86_model == 0x16) || (c->x86_model == 0x17) || - (c->x86_model == 0x1A) || (c->x86_model == 0x1c))) { + (c->x86_model == 0x1a) || (c->x86_model == 0x1c) || + (c->x86_model == 0x1e))) { /* supported CPU not found, but report the unknown family 6 CPU */ diff --git a/drivers/hwmon/fscher.c b/drivers/hwmon/fscher.c deleted file mode 100644 index 12c70e402cb..00000000000 --- a/drivers/hwmon/fscher.c +++ /dev/null @@ -1,680 +0,0 @@ -/* - * fscher.c - Part of lm_sensors, Linux kernel modules for hardware - * monitoring - * Copyright (C) 2003, 2004 Reinhard Nissl <rnissl@gmx.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * fujitsu siemens hermes chip, - * module based on fscpos.c - * Copyright (C) 2000 Hermann Jung <hej@odn.de> - * Copyright (C) 1998, 1999 Frodo Looijaard <frodol@dds.nl> - * and Philip Edelbrock <phil@netroedge.com> - */ - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/jiffies.h> -#include <linux/i2c.h> -#include <linux/hwmon.h> -#include <linux/err.h> -#include <linux/mutex.h> -#include <linux/sysfs.h> - -/* - * Addresses to scan - */ - -static const unsigned short normal_i2c[] = { 0x73, I2C_CLIENT_END }; - -/* - * Insmod parameters - */ - -I2C_CLIENT_INSMOD_1(fscher); - -/* - * The FSCHER registers - */ - -/* chip identification */ -#define FSCHER_REG_IDENT_0 0x00 -#define FSCHER_REG_IDENT_1 0x01 -#define FSCHER_REG_IDENT_2 0x02 -#define FSCHER_REG_REVISION 0x03 - -/* global control and status */ -#define FSCHER_REG_EVENT_STATE 0x04 -#define FSCHER_REG_CONTROL 0x05 - -/* watchdog */ -#define FSCHER_REG_WDOG_PRESET 0x28 -#define FSCHER_REG_WDOG_STATE 0x23 -#define FSCHER_REG_WDOG_CONTROL 0x21 - -/* fan 0 */ -#define FSCHER_REG_FAN0_MIN 0x55 -#define FSCHER_REG_FAN0_ACT 0x0e -#define FSCHER_REG_FAN0_STATE 0x0d -#define FSCHER_REG_FAN0_RIPPLE 0x0f - -/* fan 1 */ -#define FSCHER_REG_FAN1_MIN 0x65 -#define FSCHER_REG_FAN1_ACT 0x6b -#define FSCHER_REG_FAN1_STATE 0x62 -#define FSCHER_REG_FAN1_RIPPLE 0x6f - -/* fan 2 */ -#define FSCHER_REG_FAN2_MIN 0xb5 -#define FSCHER_REG_FAN2_ACT 0xbb -#define FSCHER_REG_FAN2_STATE 0xb2 -#define FSCHER_REG_FAN2_RIPPLE 0xbf - -/* voltage supervision */ -#define FSCHER_REG_VOLT_12 0x45 -#define FSCHER_REG_VOLT_5 0x42 -#define FSCHER_REG_VOLT_BATT 0x48 - -/* temperature 0 */ -#define FSCHER_REG_TEMP0_ACT 0x64 -#define FSCHER_REG_TEMP0_STATE 0x71 - -/* temperature 1 */ -#define FSCHER_REG_TEMP1_ACT 0x32 -#define FSCHER_REG_TEMP1_STATE 0x81 - -/* temperature 2 */ -#define FSCHER_REG_TEMP2_ACT 0x35 -#define FSCHER_REG_TEMP2_STATE 0x91 - -/* - * Functions declaration - */ - -static int fscher_probe(struct i2c_client *client, - const struct i2c_device_id *id); -static int fscher_detect(struct i2c_client *client, int kind, - struct i2c_board_info *info); -static int fscher_remove(struct i2c_client *client); -static struct fscher_data *fscher_update_device(struct device *dev); -static void fscher_init_client(struct i2c_client *client); - -static int fscher_read_value(struct i2c_client *client, u8 reg); -static int fscher_write_value(struct i2c_client *client, u8 reg, u8 value); - -/* - * Driver data (common to all clients) - */ - -static const struct i2c_device_id fscher_id[] = { - { "fscher", fscher }, - { } -}; - -static struct i2c_driver fscher_driver = { - .class = I2C_CLASS_HWMON, - .driver = { - .name = "fscher", - }, - .probe = fscher_probe, - .remove = fscher_remove, - .id_table = fscher_id, - .detect = fscher_detect, - .address_data = &addr_data, -}; - -/* - * Client data (each client gets its own) - */ - -struct fscher_data { - struct device *hwmon_dev; - struct mutex update_lock; - char valid; /* zero until following fields are valid */ - unsigned long last_updated; /* in jiffies */ - - /* register values */ - u8 revision; /* revision of chip */ - u8 global_event; /* global event status */ - u8 global_control; /* global control register */ - u8 watchdog[3]; /* watchdog */ - u8 volt[3]; /* 12, 5, battery voltage */ - u8 temp_act[3]; /* temperature */ - u8 temp_status[3]; /* status of sensor */ - u8 fan_act[3]; /* fans revolutions per second */ - u8 fan_status[3]; /* fan status */ - u8 fan_min[3]; /* fan min value for rps */ - u8 fan_ripple[3]; /* divider for rps */ -}; - -/* - * Sysfs stuff - */ - -#define sysfs_r(kind, sub, offset, reg) \ -static ssize_t show_##kind##sub (struct fscher_data *, char *, int); \ -static ssize_t show_##kind##offset##sub (struct device *, struct device_attribute *attr, char *); \ -static ssize_t show_##kind##offset##sub (struct device *dev, struct device_attribute *attr, char *buf) \ -{ \ - struct fscher_data *data = fscher_update_device(dev); \ - return show_##kind##sub(data, buf, (offset)); \ -} - -#define sysfs_w(kind, sub, offset, reg) \ -static ssize_t set_##kind##sub (struct i2c_client *, struct fscher_data *, const char *, size_t, int, int); \ -static ssize_t set_##kind##offset##sub (struct device *, struct device_attribute *attr, const char *, size_t); \ -static ssize_t set_##kind##offset##sub (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) \ -{ \ - struct i2c_client *client = to_i2c_client(dev); \ - struct fscher_data *data = i2c_get_clientdata(client); \ - return set_##kind##sub(client, data, buf, count, (offset), reg); \ -} - -#define sysfs_rw_n(kind, sub, offset, reg) \ -sysfs_r(kind, sub, offset, reg) \ -sysfs_w(kind, sub, offset, reg) \ -static DEVICE_ATTR(kind##offset##sub, S_IRUGO | S_IWUSR, show_##kind##offset##sub, set_##kind##offset##sub); - -#define sysfs_rw(kind, sub, reg) \ -sysfs_r(kind, sub, 0, reg) \ -sysfs_w(kind, sub, 0, reg) \ -static DEVICE_ATTR(kind##sub, S_IRUGO | S_IWUSR, show_##kind##0##sub, set_##kind##0##sub); - -#define sysfs_ro_n(kind, sub, offset, reg) \ -sysfs_r(kind, sub, offset, reg) \ -static DEVICE_ATTR(kind##offset##sub, S_IRUGO, show_##kind##offset##sub, NULL); - -#define sysfs_ro(kind, sub, reg) \ -sysfs_r(kind, sub, 0, reg) \ -static DEVICE_ATTR(kind, S_IRUGO, show_##kind##0##sub, NULL); - -#define sysfs_fan(offset, reg_status, reg_min, reg_ripple, reg_act) \ -sysfs_rw_n(pwm, , offset, reg_min) \ -sysfs_rw_n(fan, _status, offset, reg_status) \ -sysfs_rw_n(fan, _div , offset, reg_ripple) \ -sysfs_ro_n(fan, _input , offset, reg_act) - -#define sysfs_temp(offset, reg_status, reg_act) \ -sysfs_rw_n(temp, _status, offset, reg_status) \ -sysfs_ro_n(temp, _input , offset, reg_act) - -#define sysfs_in(offset, reg_act) \ -sysfs_ro_n(in, _input, offset, reg_act) - -#define sysfs_revision(reg_revision) \ -sysfs_ro(revision, , reg_revision) - -#define sysfs_alarms(reg_events) \ -sysfs_ro(alarms, , reg_events) - -#define sysfs_control(reg_control) \ -sysfs_rw(control, , reg_control) - -#define sysfs_watchdog(reg_control, reg_status, reg_preset) \ -sysfs_rw(watchdog, _control, reg_control) \ -sysfs_rw(watchdog, _status , reg_status) \ -sysfs_rw(watchdog, _preset , reg_preset) - -sysfs_fan(1, FSCHER_REG_FAN0_STATE, FSCHER_REG_FAN0_MIN, - FSCHER_REG_FAN0_RIPPLE, FSCHER_REG_FAN0_ACT) -sysfs_fan(2, FSCHER_REG_FAN1_STATE, FSCHER_REG_FAN1_MIN, - FSCHER_REG_FAN1_RIPPLE, FSCHER_REG_FAN1_ACT) -sysfs_fan(3, FSCHER_REG_FAN2_STATE, FSCHER_REG_FAN2_MIN, - FSCHER_REG_FAN2_RIPPLE, FSCHER_REG_FAN2_ACT) - -sysfs_temp(1, FSCHER_REG_TEMP0_STATE, FSCHER_REG_TEMP0_ACT) -sysfs_temp(2, FSCHER_REG_TEMP1_STATE, FSCHER_REG_TEMP1_ACT) -sysfs_temp(3, FSCHER_REG_TEMP2_STATE, FSCHER_REG_TEMP2_ACT) - -sysfs_in(0, FSCHER_REG_VOLT_12) -sysfs_in(1, FSCHER_REG_VOLT_5) -sysfs_in(2, FSCHER_REG_VOLT_BATT) - -sysfs_revision(FSCHER_REG_REVISION) -sysfs_alarms(FSCHER_REG_EVENTS) -sysfs_control(FSCHER_REG_CONTROL) -sysfs_watchdog(FSCHER_REG_WDOG_CONTROL, FSCHER_REG_WDOG_STATE, FSCHER_REG_WDOG_PRESET) - -static struct attribute *fscher_attributes[] = { - &dev_attr_revision.attr, - &dev_attr_alarms.attr, - &dev_attr_control.attr, - - &dev_attr_watchdog_status.attr, - &dev_attr_watchdog_control.attr, - &dev_attr_watchdog_preset.attr, - - &dev_attr_in0_input.attr, - &dev_attr_in1_input.attr, - &dev_attr_in2_input.attr, - - &dev_attr_fan1_status.attr, - &dev_attr_fan1_div.attr, - &dev_attr_fan1_input.attr, - &dev_attr_pwm1.attr, - &dev_attr_fan2_status.attr, - &dev_attr_fan2_div.attr, - &dev_attr_fan2_input.attr, - &dev_attr_pwm2.attr, - &dev_attr_fan3_status.attr, - &dev_attr_fan3_div.attr, - &dev_attr_fan3_input.attr, - &dev_attr_pwm3.attr, - - &dev_attr_temp1_status.attr, - &dev_attr_temp1_input.attr, - &dev_attr_temp2_status.attr, - &dev_attr_temp2_input.attr, - &dev_attr_temp3_status.attr, - &dev_attr_temp3_input.attr, - NULL -}; - -static const struct attribute_group fscher_group = { - .attrs = fscher_attributes, -}; - -/* - * Real code - */ - -/* Return 0 if detection is successful, -ENODEV otherwise */ -static int fscher_detect(struct i2c_client *new_client, int kind, - struct i2c_board_info *info) -{ - struct i2c_adapter *adapter = new_client->adapter; - - if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - return -ENODEV; - - /* Do the remaining detection unless force or force_fscher parameter */ - if (kind < 0) { - if ((i2c_smbus_read_byte_data(new_client, - FSCHER_REG_IDENT_0) != 0x48) /* 'H' */ - || (i2c_smbus_read_byte_data(new_client, - FSCHER_REG_IDENT_1) != 0x45) /* 'E' */ - || (i2c_smbus_read_byte_data(new_client, - FSCHER_REG_IDENT_2) != 0x52)) /* 'R' */ - return -ENODEV; - } - - strlcpy(info->type, "fscher", I2C_NAME_SIZE); - - return 0; -} - -static int fscher_probe(struct i2c_client *new_client, - const struct i2c_device_id *id) -{ - struct fscher_data *data; - int err; - - data = kzalloc(sizeof(struct fscher_data), GFP_KERNEL); - if (!data) { - err = -ENOMEM; - goto exit; - } - - i2c_set_clientdata(new_client, data); - data->valid = 0; - mutex_init(&data->update_lock); - - fscher_init_client(new_client); - - /* Register sysfs hooks */ - if ((err = sysfs_create_group(&new_client->dev.kobj, &fscher_group))) - goto exit_free; - - data->hwmon_dev = hwmon_device_register(&new_client->dev); - if (IS_ERR(data->hwmon_dev)) { - err = PTR_ERR(data->hwmon_dev); - goto exit_remove_files; - } - - return 0; - -exit_remove_files: - sysfs_remove_group(&new_client->dev.kobj, &fscher_group); -exit_free: - kfree(data); -exit: - return err; -} - -static int fscher_remove(struct i2c_client *client) -{ - struct fscher_data *data = i2c_get_clientdata(client); - - hwmon_device_unregister(data->hwmon_dev); - sysfs_remove_group(&client->dev.kobj, &fscher_group); - - kfree(data); - return 0; -} - -static int fscher_read_value(struct i2c_client *client, u8 reg) -{ - dev_dbg(&client->dev, "read reg 0x%02x\n", reg); - - return i2c_smbus_read_byte_data(client, reg); -} - -static int fscher_write_value(struct i2c_client *client, u8 reg, u8 value) -{ - dev_dbg(&client->dev, "write reg 0x%02x, val 0x%02x\n", - reg, value); - - return i2c_smbus_write_byte_data(client, reg, value); -} - -/* Called when we have found a new FSC Hermes. */ -static void fscher_init_client(struct i2c_client *client) -{ - struct fscher_data *data = i2c_get_clientdata(client); - - /* Read revision from chip */ - data->revision = fscher_read_value(client, FSCHER_REG_REVISION); -} - -static struct fscher_data *fscher_update_device(struct device *dev) -{ - struct i2c_client *client = to_i2c_client(dev); - struct fscher_data *data = i2c_get_clientdata(client); - - mutex_lock(&data->update_lock); - - if (time_after(jiffies, data->last_updated + 2 * HZ) || !data->valid) { - - dev_dbg(&client->dev, "Starting fscher update\n"); - - data->temp_act[0] = fscher_read_value(client, FSCHER_REG_TEMP0_ACT); - data->temp_act[1] = fscher_read_value(client, FSCHER_REG_TEMP1_ACT); - data->temp_act[2] = fscher_read_value(client, FSCHER_REG_TEMP2_ACT); - data->temp_status[0] = fscher_read_value(client, FSCHER_REG_TEMP0_STATE); - data->temp_status[1] = fscher_read_value(client, FSCHER_REG_TEMP1_STATE); - data->temp_status[2] = fscher_read_value(client, FSCHER_REG_TEMP2_STATE); - - data->volt[0] = fscher_read_value(client, FSCHER_REG_VOLT_12); - data->volt[1] = fscher_read_value(client, FSCHER_REG_VOLT_5); - data->volt[2] = fscher_read_value(client, FSCHER_REG_VOLT_BATT); - - data->fan_act[0] = fscher_read_value(client, FSCHER_REG_FAN0_ACT); - data->fan_act[1] = fscher_read_value(client, FSCHER_REG_FAN1_ACT); - data->fan_act[2] = fscher_read_value(client, FSCHER_REG_FAN2_ACT); - data->fan_status[0] = fscher_read_value(client, FSCHER_REG_FAN0_STATE); - data->fan_status[1] = fscher_read_value(client, FSCHER_REG_FAN1_STATE); - data->fan_status[2] = fscher_read_value(client, FSCHER_REG_FAN2_STATE); - data->fan_min[0] = fscher_read_value(client, FSCHER_REG_FAN0_MIN); - data->fan_min[1] = fscher_read_value(client, FSCHER_REG_FAN1_MIN); - data->fan_min[2] = fscher_read_value(client, FSCHER_REG_FAN2_MIN); - data->fan_ripple[0] = fscher_read_value(client, FSCHER_REG_FAN0_RIPPLE); - data->fan_ripple[1] = fscher_read_value(client, FSCHER_REG_FAN1_RIPPLE); - data->fan_ripple[2] = fscher_read_value(client, FSCHER_REG_FAN2_RIPPLE); - - data->watchdog[0] = fscher_read_value(client, FSCHER_REG_WDOG_PRESET); - data->watchdog[1] = fscher_read_value(client, FSCHER_REG_WDOG_STATE); - data->watchdog[2] = fscher_read_value(client, FSCHER_REG_WDOG_CONTROL); - - data->global_event = fscher_read_value(client, FSCHER_REG_EVENT_STATE); - data->global_control = fscher_read_value(client, - FSCHER_REG_CONTROL); - - data->last_updated = jiffies; - data->valid = 1; - } - - mutex_unlock(&data->update_lock); - - return data; -} - - - -#define FAN_INDEX_FROM_NUM(nr) ((nr) - 1) - -static ssize_t set_fan_status(struct i2c_client *client, struct fscher_data *data, - const char *buf, size_t count, int nr, int reg) -{ - /* bits 0..1, 3..7 reserved => mask with 0x04 */ - unsigned long v = simple_strtoul(buf, NULL, 10) & 0x04; - - mutex_lock(&data->update_lock); - data->fan_status[FAN_INDEX_FROM_NUM(nr)] &= ~v; - fscher_write_value(client, reg, v); - mutex_unlock(&data->update_lock); - return count; -} - -static ssize_t show_fan_status(struct fscher_data *data, char *buf, int nr) -{ - /* bits 0..1, 3..7 reserved => mask with 0x04 */ - return sprintf(buf, "%u\n", data->fan_status[FAN_INDEX_FROM_NUM(nr)] & 0x04); -} - -static ssize_t set_pwm(struct i2c_client *client, struct fscher_data *data, - const char *buf, size_t count, int nr, int reg) -{ - unsigned long v = simple_strtoul(buf, NULL, 10); - - mutex_lock(&data->update_lock); - data->fan_min[FAN_INDEX_FROM_NUM(nr)] = v > 0xff ? 0xff : v; - fscher_write_value(client, reg, data->fan_min[FAN_INDEX_FROM_NUM(nr)]); - mutex_unlock(&data->update_lock); - return count; -} - -static ssize_t show_pwm(struct fscher_data *data, char *buf, int nr) -{ - return sprintf(buf, "%u\n", data->fan_min[FAN_INDEX_FROM_NUM(nr)]); -} - -static ssize_t set_fan_div(struct i2c_client *client, struct fscher_data *data, - const char *buf, size_t count, int nr, int reg) -{ - /* supported values: 2, 4, 8 */ - unsigned long v = simple_strtoul(buf, NULL, 10); - - switch (v) { - case 2: v = 1; break; - case 4: v = 2; break; - case 8: v = 3; break; - default: - dev_err(&client->dev, "fan_div value %ld not " - "supported. Choose one of 2, 4 or 8!\n", v); - return -EINVAL; - } - - mutex_lock(&data->update_lock); - - /* bits 2..7 reserved => mask with 0x03 */ - data->fan_ripple[FAN_INDEX_FROM_NUM(nr)] &= ~0x03; - data->fan_ripple[FAN_INDEX_FROM_NUM(nr)] |= v; - - fscher_write_value(client, reg, data->fan_ripple[FAN_INDEX_FROM_NUM(nr)]); - mutex_unlock(&data->update_lock); - return count; -} - -static ssize_t show_fan_div(struct fscher_data *data, char *buf, int nr) -{ - /* bits 2..7 reserved => mask with 0x03 */ - return sprintf(buf, "%u\n", 1 << (data->fan_ripple[FAN_INDEX_FROM_NUM(nr)] & 0x03)); -} - -#define RPM_FROM_REG(val) (val*60) - -static ssize_t show_fan_input (struct fscher_data *data, char *buf, int nr) -{ - return sprintf(buf, "%u\n", RPM_FROM_REG(data->fan_act[FAN_INDEX_FROM_NUM(nr)])); -} - - - -#define TEMP_INDEX_FROM_NUM(nr) ((nr) - 1) - -static ssize_t set_temp_status(struct i2c_client *client, struct fscher_data *data, - const char *buf, size_t count, int nr, int reg) -{ - /* bits 2..7 reserved, 0 read only => mask with 0x02 */ - unsigned long v = simple_strtoul(buf, NULL, 10) & 0x02; - - mutex_lock(&data->update_lock); - data->temp_status[TEMP_INDEX_FROM_NUM(nr)] &= ~v; - fscher_write_value(client, reg, v); - mutex_unlock(&data->update_lock); - return count; -} - -static ssize_t show_temp_status(struct fscher_data *data, char *buf, int nr) -{ - /* bits 2..7 reserved => mask with 0x03 */ - return sprintf(buf, "%u\n", data->temp_status[TEMP_INDEX_FROM_NUM(nr)] & 0x03); -} - -#define TEMP_FROM_REG(val) (((val) - 128) * 1000) - -static ssize_t show_temp_input(struct fscher_data *data, char *buf, int nr) -{ - return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_act[TEMP_INDEX_FROM_NUM(nr)])); -} - -/* - * The final conversion is specified in sensors.conf, as it depends on - * mainboard specific values. We export the registers contents as - * pseudo-hundredths-of-Volts (range 0V - 2.55V). Not that it makes much - * sense per se, but it minimizes the conversions count and keeps the - * values within a usual range. - */ -#define VOLT_FROM_REG(val) ((val) * 10) - -static ssize_t show_in_input(struct fscher_data *data, char *buf, int nr) -{ - return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[nr])); -} - - - -static ssize_t show_revision(struct fscher_data *data, char *buf, int nr) -{ - return sprintf(buf, "%u\n", data->revision); -} - - - -static ssize_t show_alarms(struct fscher_data *data, char *buf, int nr) -{ - /* bits 2, 5..6 reserved => mask with 0x9b */ - return sprintf(buf, "%u\n", data->global_event & 0x9b); -} - - - -static ssize_t set_control(struct i2c_client *client, struct fscher_data *data, - const char *buf, size_t count, int nr, int reg) -{ - /* bits 1..7 reserved => mask with 0x01 */ - unsigned long v = simple_strtoul(buf, NULL, 10) & 0x01; - - mutex_lock(&data->update_lock); - data->global_control = v; - fscher_write_value(client, reg, v); - mutex_unlock(&data->update_lock); - return count; -} - -static ssize_t show_control(struct fscher_data *data, char *buf, int nr) -{ - /* bits 1..7 reserved => mask with 0x01 */ - return sprintf(buf, "%u\n", data->global_control & 0x01); -} - - - -static ssize_t set_watchdog_control(struct i2c_client *client, struct - fscher_data *data, const char *buf, size_t count, - int nr, int reg) -{ - /* bits 0..3 reserved => mask with 0xf0 */ - unsigned long v = simple_strtoul(buf, NULL, 10) & 0xf0; - - mutex_lock(&data->update_lock); - data->watchdog[2] &= ~0xf0; - data->watchdog[2] |= v; - fscher_write_value(client, reg, data->watchdog[2]); - mutex_unlock(&data->update_lock); - return count; -} - -static ssize_t show_watchdog_control(struct fscher_data *data, char *buf, int nr) -{ - /* bits 0..3 reserved, bit 5 write only => mask with 0xd0 */ - return sprintf(buf, "%u\n", data->watchdog[2] & 0xd0); -} - -static ssize_t set_watchdog_status(struct i2c_client *client, struct fscher_data *data, - const char *buf, size_t count, int nr, int reg) -{ - /* bits 0, 2..7 reserved => mask with 0x02 */ - unsigned long v = simple_strtoul(buf, NULL, 10) & 0x02; - - mutex_lock(&data->update_lock); - data->watchdog[1] &= ~v; - fscher_write_value(client, reg, v); - mutex_unlock(&data->update_lock); - return count; -} - -static ssize_t show_watchdog_status(struct fscher_data *data, char *buf, int nr) -{ - /* bits 0, 2..7 reserved => mask with 0x02 */ - return sprintf(buf, "%u\n", data->watchdog[1] & 0x02); -} - -static ssize_t set_watchdog_preset(struct i2c_client *client, struct fscher_data *data, - const char *buf, size_t count, int nr, int reg) -{ - unsigned long v = simple_strtoul(buf, NULL, 10) & 0xff; - - mutex_lock(&data->update_lock); - data->watchdog[0] = v; - fscher_write_value(client, reg, data->watchdog[0]); - mutex_unlock(&data->update_lock); - return count; -} - -static ssize_t show_watchdog_preset(struct fscher_data *data, char *buf, int nr) -{ - return sprintf(buf, "%u\n", data->watchdog[0]); -} - -static int __init sensors_fscher_init(void) -{ - return i2c_add_driver(&fscher_driver); -} - -static void __exit sensors_fscher_exit(void) -{ - i2c_del_driver(&fscher_driver); -} - -MODULE_AUTHOR("Reinhard Nissl <rnissl@gmx.de>"); -MODULE_DESCRIPTION("FSC Hermes driver"); -MODULE_LICENSE("GPL"); - -module_init(sensors_fscher_init); -module_exit(sensors_fscher_exit); diff --git a/drivers/hwmon/fscpos.c b/drivers/hwmon/fscpos.c deleted file mode 100644 index 8a7bcf500b4..00000000000 --- a/drivers/hwmon/fscpos.c +++ /dev/null @@ -1,654 +0,0 @@ -/* - fscpos.c - Kernel module for hardware monitoring with FSC Poseidon chips - Copyright (C) 2004, 2005 Stefan Ott <stefan@desire.ch> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -/* - fujitsu siemens poseidon chip, - module based on the old fscpos module by Hermann Jung <hej@odn.de> and - the fscher module by Reinhard Nissl <rnissl@gmx.de> - - original module based on lm80.c - Copyright (C) 1998, 1999 Frodo Looijaard <frodol@dds.nl> - and Philip Edelbrock <phil@netroedge.com> - - Thanks to Jean Delvare for reviewing my code and suggesting a lot of - improvements. -*/ - -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/jiffies.h> -#include <linux/i2c.h> -#include <linux/init.h> -#include <linux/hwmon.h> -#include <linux/err.h> -#include <linux/mutex.h> -#include <linux/sysfs.h> - -/* - * Addresses to scan - */ -static const unsigned short normal_i2c[] = { 0x73, I2C_CLIENT_END }; - -/* - * Insmod parameters - */ -I2C_CLIENT_INSMOD_1(fscpos); - -/* - * The FSCPOS registers - */ - -/* chip identification */ -#define FSCPOS_REG_IDENT_0 0x00 -#define FSCPOS_REG_IDENT_1 0x01 -#define FSCPOS_REG_IDENT_2 0x02 -#define FSCPOS_REG_REVISION 0x03 - -/* global control and status */ -#define FSCPOS_REG_EVENT_STATE 0x04 -#define FSCPOS_REG_CONTROL 0x05 - -/* watchdog */ -#define FSCPOS_REG_WDOG_PRESET 0x28 -#define FSCPOS_REG_WDOG_STATE 0x23 -#define FSCPOS_REG_WDOG_CONTROL 0x21 - -/* voltages */ -#define FSCPOS_REG_VOLT_12 0x45 -#define FSCPOS_REG_VOLT_5 0x42 -#define FSCPOS_REG_VOLT_BATT 0x48 - -/* fans - the chip does not support minimum speed for fan2 */ -static u8 FSCPOS_REG_PWM[] = { 0x55, 0x65 }; -static u8 FSCPOS_REG_FAN_ACT[] = { 0x0e, 0x6b, 0xab }; -static u8 FSCPOS_REG_FAN_STATE[] = { 0x0d, 0x62, 0xa2 }; -static u8 FSCPOS_REG_FAN_RIPPLE[] = { 0x0f, 0x6f, 0xaf }; - -/* temperatures */ -static u8 FSCPOS_REG_TEMP_ACT[] = { 0x64, 0x32, 0x35 }; -static u8 FSCPOS_REG_TEMP_STATE[] = { 0x71, 0x81, 0x91 }; - -/* - * Functions declaration - */ -static int fscpos_probe(struct i2c_client *client, - const struct i2c_device_id *id); -static int fscpos_detect(struct i2c_client *client, int kind, - struct i2c_board_info *info); -static int fscpos_remove(struct i2c_client *client); - -static int fscpos_read_value(struct i2c_client *client, u8 reg); -static int fscpos_write_value(struct i2c_client *client, u8 reg, u8 value); -static struct fscpos_data *fscpos_update_device(struct device *dev); -static void fscpos_init_client(struct i2c_client *client); - -static void reset_fan_alarm(struct i2c_client *client, int nr); - -/* - * Driver data (common to all clients) - */ -static const struct i2c_device_id fscpos_id[] = { - { "fscpos", fscpos }, - { } -}; - -static struct i2c_driver fscpos_driver = { - .class = I2C_CLASS_HWMON, - .driver = { - .name = "fscpos", - }, - .probe = fscpos_probe, - .remove = fscpos_remove, - .id_table = fscpos_id, - .detect = fscpos_detect, - .address_data = &addr_data, -}; - -/* - * Client data (each client gets its own) - */ -struct fscpos_data { - struct device *hwmon_dev; - struct mutex update_lock; - char valid; /* 0 until following fields are valid */ - unsigned long last_updated; /* In jiffies */ - - /* register values */ - u8 revision; /* revision of chip */ - u8 global_event; /* global event status */ - u8 global_control; /* global control register */ - u8 wdog_control; /* watchdog control */ - u8 wdog_state; /* watchdog status */ - u8 wdog_preset; /* watchdog preset */ - u8 volt[3]; /* 12, 5, battery current */ - u8 temp_act[3]; /* temperature */ - u8 temp_status[3]; /* status of sensor */ - u8 fan_act[3]; /* fans revolutions per second */ - u8 fan_status[3]; /* fan status */ - u8 pwm[2]; /* fan min value for rps */ - u8 fan_ripple[3]; /* divider for rps */ -}; - -/* Temperature */ -#define TEMP_FROM_REG(val) (((val) - 128) * 1000) - -static ssize_t show_temp_input(struct fscpos_data *data, char *buf, int nr) -{ - return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp_act[nr - 1])); -} - -static ssize_t show_temp_status(struct fscpos_data *data, char *buf, int nr) -{ - /* bits 2..7 reserved => mask with 0x03 */ - return sprintf(buf, "%u\n", data->temp_status[nr - 1] & 0x03); -} - -static ssize_t show_temp_reset(struct fscpos_data *data, char *buf, int nr) -{ - return sprintf(buf, "1\n"); -} - -static ssize_t set_temp_reset(struct i2c_client *client, struct fscpos_data - *data, const char *buf, size_t count, int nr, int reg) -{ - unsigned long v = simple_strtoul(buf, NULL, 10); - if (v != 1) { - dev_err(&client->dev, "temp_reset value %ld not supported. " - "Use 1 to reset the alarm!\n", v); - return -EINVAL; - } - - dev_info(&client->dev, "You used the temp_reset feature which has not " - "been proplerly tested. Please report your " - "experience to the module author.\n"); - - /* Supported value: 2 (clears the status) */ - fscpos_write_value(client, FSCPOS_REG_TEMP_STATE[nr - 1], 2); - return count; -} - -/* Fans */ -#define RPM_FROM_REG(val) ((val) * 60) - -static ssize_t show_fan_status(struct fscpos_data *data, char *buf, int nr) -{ - /* bits 0..1, 3..7 reserved => mask with 0x04 */ - return sprintf(buf, "%u\n", data->fan_status[nr - 1] & 0x04); -} - -static ssize_t show_fan_input(struct fscpos_data *data, char *buf, int nr) -{ - return sprintf(buf, "%u\n", RPM_FROM_REG(data->fan_act[nr - 1])); -} - -static ssize_t show_fan_ripple(struct fscpos_data *data, char *buf, int nr) -{ - /* bits 2..7 reserved => mask with 0x03 */ - return sprintf(buf, "%u\n", data->fan_ripple[nr - 1] & 0x03); -} - -static ssize_t set_fan_ripple(struct i2c_client *client, struct fscpos_data - *data, const char *buf, size_t count, int nr, int reg) -{ - /* supported values: 2, 4, 8 */ - unsigned long v = simple_strtoul(buf, NULL, 10); - - switch (v) { - case 2: v = 1; break; - case 4: v = 2; break; - case 8: v = 3; break; - default: - dev_err(&client->dev, "fan_ripple value %ld not supported. " - "Must be one of 2, 4 or 8!\n", v); - return -EINVAL; - } - - mutex_lock(&data->update_lock); - /* bits 2..7 reserved => mask with 0x03 */ - data->fan_ripple[nr - 1] &= ~0x03; - data->fan_ripple[nr - 1] |= v; - - fscpos_write_value(client, reg, data->fan_ripple[nr - 1]); - mutex_unlock(&data->update_lock); - return count; -} - -static ssize_t show_pwm(struct fscpos_data *data, char *buf, int nr) -{ - return sprintf(buf, "%u\n", data->pwm[nr - 1]); -} - -static ssize_t set_pwm(struct i2c_client *client, struct fscpos_data *data, - const char *buf, size_t count, int nr, int reg) -{ - unsigned long v = simple_strtoul(buf, NULL, 10); - - /* Range: 0..255 */ - if (v < 0) v = 0; - if (v > 255) v = 255; - - mutex_lock(&data->update_lock); - data->pwm[nr - 1] = v; - fscpos_write_value(client, reg, data->pwm[nr - 1]); - mutex_unlock(&data->update_lock); - return count; -} - -static void reset_fan_alarm(struct i2c_client *client, int nr) -{ - fscpos_write_value(client, FSCPOS_REG_FAN_STATE[nr], 4); -} - -/* Volts */ -#define VOLT_FROM_REG(val, mult) ((val) * (mult) / 255) - -static ssize_t show_volt_12(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct fscpos_data *data = fscpos_update_device(dev); - return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[0], 14200)); -} - -static ssize_t show_volt_5(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct fscpos_data *data = fscpos_update_device(dev); - return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[1], 6600)); -} - -static ssize_t show_volt_batt(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct fscpos_data *data = fscpos_update_device(dev); - return sprintf(buf, "%u\n", VOLT_FROM_REG(data->volt[2], 3300)); -} - -/* Watchdog */ -static ssize_t show_wdog_control(struct fscpos_data *data, char *buf) -{ - /* bits 0..3 reserved, bit 6 write only => mask with 0xb0 */ - return sprintf(buf, "%u\n", data->wdog_control & 0xb0); -} - -static ssize_t set_wdog_control(struct i2c_client *client, struct fscpos_data - *data, const char *buf, size_t count, int reg) -{ - /* bits 0..3 reserved => mask with 0xf0 */ - unsigned long v = simple_strtoul(buf, NULL, 10) & 0xf0; - - mutex_lock(&data->update_lock); - data->wdog_control &= ~0xf0; - data->wdog_control |= v; - fscpos_write_value(client, reg, data->wdog_control); - mutex_unlock(&data->update_lock); - return count; -} - -static ssize_t show_wdog_state(struct fscpos_data *data, char *buf) -{ - /* bits 0, 2..7 reserved => mask with 0x02 */ - return sprintf(buf, "%u\n", data->wdog_state & 0x02); -} - -static ssize_t set_wdog_state(struct i2c_client *client, struct fscpos_data - *data, const char *buf, size_t count, int reg) -{ - unsigned long v = simple_strtoul(buf, NULL, 10) & 0x02; - - /* Valid values: 2 (clear) */ - if (v != 2) { - dev_err(&client->dev, "wdog_state value %ld not supported. " - "Must be 2 to clear the state!\n", v); - return -EINVAL; - } - - mutex_lock(&data->update_lock); - data->wdog_state &= ~v; - fscpos_write_value(client, reg, v); - mutex_unlock(&data->update_lock); - return count; -} - -static ssize_t show_wdog_preset(struct fscpos_data *data, char *buf) -{ - return sprintf(buf, "%u\n", data->wdog_preset); -} - -static ssize_t set_wdog_preset(struct i2c_client *client, struct fscpos_data - *data, const char *buf, size_t count, int reg) -{ - unsigned long v = simple_strtoul(buf, NULL, 10) & 0xff; - - mutex_lock(&data->update_lock); - data->wdog_preset = v; - fscpos_write_value(client, reg, data->wdog_preset); - mutex_unlock(&data->update_lock); - return count; -} - -/* Event */ -static ssize_t show_event(struct device *dev, struct device_attribute *attr, char *buf) -{ - /* bits 5..7 reserved => mask with 0x1f */ - struct fscpos_data *data = fscpos_update_device(dev); - return sprintf(buf, "%u\n", data->global_event & 0x9b); -} - -/* - * Sysfs stuff - */ -#define create_getter(kind, sub) \ - static ssize_t sysfs_show_##kind##sub(struct device *dev, struct device_attribute *attr, char *buf) \ - { \ - struct fscpos_data *data = fscpos_update_device(dev); \ - return show_##kind##sub(data, buf); \ - } - -#define create_getter_n(kind, offset, sub) \ - static ssize_t sysfs_show_##kind##offset##sub(struct device *dev, struct device_attribute *attr, char\ - *buf) \ - { \ - struct fscpos_data *data = fscpos_update_device(dev); \ - return show_##kind##sub(data, buf, offset); \ - } - -#define create_setter(kind, sub, reg) \ - static ssize_t sysfs_set_##kind##sub (struct device *dev, struct device_attribute *attr, const char \ - *buf, size_t count) \ - { \ - struct i2c_client *client = to_i2c_client(dev); \ - struct fscpos_data *data = i2c_get_clientdata(client); \ - return set_##kind##sub(client, data, buf, count, reg); \ - } - -#define create_setter_n(kind, offset, sub, reg) \ - static ssize_t sysfs_set_##kind##offset##sub (struct device *dev, struct device_attribute *attr, \ - const char *buf, size_t count) \ - { \ - struct i2c_client *client = to_i2c_client(dev); \ - struct fscpos_data *data = i2c_get_clientdata(client); \ - return set_##kind##sub(client, data, buf, count, offset, reg);\ - } - -#define create_sysfs_device_ro(kind, sub, offset) \ - static DEVICE_ATTR(kind##offset##sub, S_IRUGO, \ - sysfs_show_##kind##offset##sub, NULL); - -#define create_sysfs_device_rw(kind, sub, offset) \ - static DEVICE_ATTR(kind##offset##sub, S_IRUGO | S_IWUSR, \ - sysfs_show_##kind##offset##sub, sysfs_set_##kind##offset##sub); - -#define sysfs_ro_n(kind, sub, offset) \ - create_getter_n(kind, offset, sub); \ - create_sysfs_device_ro(kind, sub, offset); - -#define sysfs_rw_n(kind, sub, offset, reg) \ - create_getter_n(kind, offset, sub); \ - create_setter_n(kind, offset, sub, reg); \ - create_sysfs_device_rw(kind, sub, offset); - -#define sysfs_rw(kind, sub, reg) \ - create_getter(kind, sub); \ - create_setter(kind, sub, reg); \ - create_sysfs_device_rw(kind, sub,); - -#define sysfs_fan_with_min(offset, reg_status, reg_ripple, reg_min) \ - sysfs_fan(offset, reg_status, reg_ripple); \ - sysfs_rw_n(pwm,, offset, reg_min); - -#define sysfs_fan(offset, reg_status, reg_ripple) \ - sysfs_ro_n(fan, _input, offset); \ - sysfs_ro_n(fan, _status, offset); \ - sysfs_rw_n(fan, _ripple, offset, reg_ripple); - -#define sysfs_temp(offset, reg_status) \ - sysfs_ro_n(temp, _input, offset); \ - sysfs_ro_n(temp, _status, offset); \ - sysfs_rw_n(temp, _reset, offset, reg_status); - -#define sysfs_watchdog(reg_wdog_preset, reg_wdog_state, reg_wdog_control) \ - sysfs_rw(wdog, _control, reg_wdog_control); \ - sysfs_rw(wdog, _preset, reg_wdog_preset); \ - sysfs_rw(wdog, _state, reg_wdog_state); - -sysfs_fan_with_min(1, FSCPOS_REG_FAN_STATE[0], FSCPOS_REG_FAN_RIPPLE[0], - FSCPOS_REG_PWM[0]); -sysfs_fan_with_min(2, FSCPOS_REG_FAN_STATE[1], FSCPOS_REG_FAN_RIPPLE[1], - FSCPOS_REG_PWM[1]); -sysfs_fan(3, FSCPOS_REG_FAN_STATE[2], FSCPOS_REG_FAN_RIPPLE[2]); - -sysfs_temp(1, FSCPOS_REG_TEMP_STATE[0]); -sysfs_temp(2, FSCPOS_REG_TEMP_STATE[1]); -sysfs_temp(3, FSCPOS_REG_TEMP_STATE[2]); - -sysfs_watchdog(FSCPOS_REG_WDOG_PRESET, FSCPOS_REG_WDOG_STATE, - FSCPOS_REG_WDOG_CONTROL); - -static DEVICE_ATTR(event, S_IRUGO, show_event, NULL); -static DEVICE_ATTR(in0_input, S_IRUGO, show_volt_12, NULL); -static DEVICE_ATTR(in1_input, S_IRUGO, show_volt_5, NULL); -static DEVICE_ATTR(in2_input, S_IRUGO, show_volt_batt, NULL); - -static struct attribute *fscpos_attributes[] = { - &dev_attr_event.attr, - &dev_attr_in0_input.attr, - &dev_attr_in1_input.attr, - &dev_attr_in2_input.attr, - - &dev_attr_wdog_control.attr, - &dev_attr_wdog_preset.attr, - &dev_attr_wdog_state.attr, - - &dev_attr_temp1_input.attr, - &dev_attr_temp1_status.attr, - &dev_attr_temp1_reset.attr, - &dev_attr_temp2_input.attr, - &dev_attr_temp2_status.attr, - &dev_attr_temp2_reset.attr, - &dev_attr_temp3_input.attr, - &dev_attr_temp3_status.attr, - &dev_attr_temp3_reset.attr, - - &dev_attr_fan1_input.attr, - &dev_attr_fan1_status.attr, - &dev_attr_fan1_ripple.attr, - &dev_attr_pwm1.attr, - &dev_attr_fan2_input.attr, - &dev_attr_fan2_status.attr, - &dev_attr_fan2_ripple.attr, - &dev_attr_pwm2.attr, - &dev_attr_fan3_input.attr, - &dev_attr_fan3_status.attr, - &dev_attr_fan3_ripple.attr, - NULL -}; - -static const struct attribute_group fscpos_group = { - .attrs = fscpos_attributes, -}; - -/* Return 0 if detection is successful, -ENODEV otherwise */ -static int fscpos_detect(struct i2c_client *new_client, int kind, - struct i2c_board_info *info) -{ - struct i2c_adapter *adapter = new_client->adapter; - - if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) - return -ENODEV; - - /* Do the remaining detection unless force or force_fscpos parameter */ - if (kind < 0) { - if ((fscpos_read_value(new_client, FSCPOS_REG_IDENT_0) - != 0x50) /* 'P' */ - || (fscpos_read_value(new_client, FSCPOS_REG_IDENT_1) - != 0x45) /* 'E' */ - || (fscpos_read_value(new_client, FSCPOS_REG_IDENT_2) - != 0x47))/* 'G' */ - return -ENODEV; - } - - strlcpy(info->type, "fscpos", I2C_NAME_SIZE); - - return 0; -} - -static int fscpos_probe(struct i2c_client *new_client, - const struct i2c_device_id *id) -{ - struct fscpos_data *data; - int err; - - data = kzalloc(sizeof(struct fscpos_data), GFP_KERNEL); - if (!data) { - err = -ENOMEM; - goto exit; - } - - i2c_set_clientdata(new_client, data); - data->valid = 0; - mutex_init(&data->update_lock); - - /* Inizialize the fscpos chip */ - fscpos_init_client(new_client); - - /* Announce that the chip was found */ - dev_info(&new_client->dev, "Found fscpos chip, rev %u\n", data->revision); - - /* Register sysfs hooks */ - if ((err = sysfs_create_group(&new_client->dev.kobj, &fscpos_group))) - goto exit_free; - - data->hwmon_dev = hwmon_device_register(&new_client->dev); - if (IS_ERR(data->hwmon_dev)) { - err = PTR_ERR(data->hwmon_dev); - goto exit_remove_files; - } - - return 0; - -exit_remove_files: - sysfs_remove_group(&new_client->dev.kobj, &fscpos_group); -exit_free: - kfree(data); -exit: - return err; -} - -static int fscpos_remove(struct i2c_client *client) -{ - struct fscpos_data *data = i2c_get_clientdata(client); - - hwmon_device_unregister(data->hwmon_dev); - sysfs_remove_group(&client->dev.kobj, &fscpos_group); - - kfree(data); - return 0; -} - -static int fscpos_read_value(struct i2c_client *client, u8 reg) -{ - dev_dbg(&client->dev, "Read reg 0x%02x\n", reg); - return i2c_smbus_read_byte_data(client, reg); -} - -static int fscpos_write_value(struct i2c_client *client, u8 reg, u8 value) -{ - dev_dbg(&client->dev, "Write reg 0x%02x, val 0x%02x\n", reg, value); - return i2c_smbus_write_byte_data(client, reg, value); -} - -/* Called when we have found a new FSCPOS chip */ -static void fscpos_init_client(struct i2c_client *client) -{ - struct fscpos_data *data = i2c_get_clientdata(client); - - /* read revision from chip */ - data->revision = fscpos_read_value(client, FSCPOS_REG_REVISION); -} - -static struct fscpos_data *fscpos_update_device(struct device *dev) -{ - struct i2c_client *client = to_i2c_client(dev); - struct fscpos_data *data = i2c_get_clientdata(client); - - mutex_lock(&data->update_lock); - - if (time_after(jiffies, data->last_updated + 2 * HZ) || !data->valid) { - int i; - - dev_dbg(&client->dev, "Starting fscpos update\n"); - - for (i = 0; i < 3; i++) { - data->temp_act[i] = fscpos_read_value(client, - FSCPOS_REG_TEMP_ACT[i]); - data->temp_status[i] = fscpos_read_value(client, - FSCPOS_REG_TEMP_STATE[i]); - data->fan_act[i] = fscpos_read_value(client, - FSCPOS_REG_FAN_ACT[i]); - data->fan_status[i] = fscpos_read_value(client, - FSCPOS_REG_FAN_STATE[i]); - data->fan_ripple[i] = fscpos_read_value(client, - FSCPOS_REG_FAN_RIPPLE[i]); - if (i < 2) { - /* fan2_min is not supported by the chip */ - data->pwm[i] = fscpos_read_value(client, - FSCPOS_REG_PWM[i]); - } - /* reset fan status if speed is back to > 0 */ - if (data->fan_status[i] != 0 && data->fan_act[i] > 0) { - reset_fan_alarm(client, i); - } - } - - data->volt[0] = fscpos_read_value(client, FSCPOS_REG_VOLT_12); - data->volt[1] = fscpos_read_value(client, FSCPOS_REG_VOLT_5); - data->volt[2] = fscpos_read_value(client, FSCPOS_REG_VOLT_BATT); - - data->wdog_preset = fscpos_read_value(client, - FSCPOS_REG_WDOG_PRESET); - data->wdog_state = fscpos_read_value(client, - FSCPOS_REG_WDOG_STATE); - data->wdog_control = fscpos_read_value(client, - FSCPOS_REG_WDOG_CONTROL); - - data->global_event = fscpos_read_value(client, - FSCPOS_REG_EVENT_STATE); - - data->last_updated = jiffies; - data->valid = 1; - } - mutex_unlock(&data->update_lock); - return data; -} - -static int __init sm_fscpos_init(void) -{ - return i2c_add_driver(&fscpos_driver); -} - -static void __exit sm_fscpos_exit(void) -{ - i2c_del_driver(&fscpos_driver); -} - -MODULE_AUTHOR("Stefan Ott <stefan@desire.ch> based on work from Hermann Jung " - "<hej@odn.de>, Frodo Looijaard <frodol@dds.nl>" - " and Philip Edelbrock <phil@netroedge.com>"); -MODULE_DESCRIPTION("fujitsu siemens poseidon chip driver"); -MODULE_LICENSE("GPL"); - -module_init(sm_fscpos_init); -module_exit(sm_fscpos_exit); diff --git a/drivers/hwmon/ltc4215.c b/drivers/hwmon/ltc4215.c index 9386e2a3921..6c9a04136e0 100644 --- a/drivers/hwmon/ltc4215.c +++ b/drivers/hwmon/ltc4215.c @@ -259,7 +259,7 @@ static int ltc4215_probe(struct i2c_client *client, mutex_init(&data->update_lock); /* Initialize the LTC4215 chip */ - /* TODO */ + i2c_smbus_write_byte_data(client, LTC4215_FAULT, 0x00); /* Register sysfs hooks */ ret = sysfs_create_group(&client->dev.kobj, <c4215_group); diff --git a/drivers/hwmon/ltc4245.c b/drivers/hwmon/ltc4245.c index 034b2c51584..e3896433361 100644 --- a/drivers/hwmon/ltc4245.c +++ b/drivers/hwmon/ltc4245.c @@ -382,7 +382,8 @@ static int ltc4245_probe(struct i2c_client *client, mutex_init(&data->update_lock); /* Initialize the LTC4245 chip */ - /* TODO */ + i2c_smbus_write_byte_data(client, LTC4245_FAULT1, 0x00); + i2c_smbus_write_byte_data(client, LTC4245_FAULT2, 0x00); /* Register sysfs hooks */ ret = sysfs_create_group(&client->dev.kobj, <c4245_group); diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c index 949c97ff57e..1f20a042a4f 100644 --- a/drivers/idle/i7300_idle.c +++ b/drivers/idle/i7300_idle.c @@ -29,8 +29,8 @@ #include <asm/idle.h> -#include "../dma/ioatdma_hw.h" -#include "../dma/ioatdma_registers.h" +#include "../dma/ioat/hw.h" +#include "../dma/ioat/registers.h" #define I7300_IDLE_DRIVER_VERSION "1.55" #define I7300_PRINT "i7300_idle:" @@ -126,9 +126,9 @@ static void i7300_idle_ioat_stop(void) udelay(10); sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & - IOAT_CHANSTS_DMA_TRANSFER_STATUS; + IOAT_CHANSTS_STATUS; - if (sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) + if (sts != IOAT_CHANSTS_ACTIVE) break; } @@ -160,9 +160,9 @@ static int __init i7300_idle_ioat_selftest(u8 *ctl, udelay(1000); chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & - IOAT_CHANSTS_DMA_TRANSFER_STATUS; + IOAT_CHANSTS_STATUS; - if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE) { + if (chan_sts != IOAT_CHANSTS_DONE) { /* Not complete, reset the channel */ writeb(IOAT_CHANCMD_RESET, ioat_chanbase + IOAT1_CHANCMD_OFFSET); @@ -288,9 +288,9 @@ static void __exit i7300_idle_ioat_exit(void) ioat_chanbase + IOAT1_CHANCMD_OFFSET); chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & - IOAT_CHANSTS_DMA_TRANSFER_STATUS; + IOAT_CHANSTS_STATUS; - if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) { + if (chan_sts != IOAT_CHANSTS_ACTIVE) { writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET); break; } @@ -298,14 +298,14 @@ static void __exit i7300_idle_ioat_exit(void) } chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & - IOAT_CHANSTS_DMA_TRANSFER_STATUS; + IOAT_CHANSTS_STATUS; /* * We tried to reset multiple times. If IO A/T channel is still active * flag an error and return without cleanup. Memory leak is better * than random corruption in that extreme error situation. */ - if (chan_sts == IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) { + if (chan_sts == IOAT_CHANSTS_ACTIVE) { printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels." " Not freeing resources\n"); return; diff --git a/drivers/input/input.c b/drivers/input/input.c index 556539d617a..e828aab7dac 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -11,6 +11,7 @@ */ #include <linux/init.h> +#include <linux/types.h> #include <linux/input.h> #include <linux/module.h> #include <linux/random.h> @@ -514,7 +515,7 @@ static void input_disconnect_device(struct input_dev *dev) * that there are no threads in the middle of input_open_device() */ mutex_lock(&dev->mutex); - dev->going_away = 1; + dev->going_away = true; mutex_unlock(&dev->mutex); spin_lock_irq(&dev->event_lock); @@ -1259,10 +1260,71 @@ static int input_dev_uevent(struct device *device, struct kobj_uevent_env *env) return 0; } +#define INPUT_DO_TOGGLE(dev, type, bits, on) \ + do { \ + int i; \ + if (!test_bit(EV_##type, dev->evbit)) \ + break; \ + for (i = 0; i < type##_MAX; i++) { \ + if (!test_bit(i, dev->bits##bit) || \ + !test_bit(i, dev->bits)) \ + continue; \ + dev->event(dev, EV_##type, i, on); \ + } \ + } while (0) + +static void input_dev_reset(struct input_dev *dev, bool activate) +{ + if (!dev->event) + return; + + INPUT_DO_TOGGLE(dev, LED, led, activate); + INPUT_DO_TOGGLE(dev, SND, snd, activate); + + if (activate && test_bit(EV_REP, dev->evbit)) { + dev->event(dev, EV_REP, REP_PERIOD, dev->rep[REP_PERIOD]); + dev->event(dev, EV_REP, REP_DELAY, dev->rep[REP_DELAY]); + } +} + +#ifdef CONFIG_PM +static int input_dev_suspend(struct device *dev) +{ + struct input_dev *input_dev = to_input_dev(dev); + + mutex_lock(&input_dev->mutex); + input_dev_reset(input_dev, false); + mutex_unlock(&input_dev->mutex); + + return 0; +} + +static int input_dev_resume(struct device *dev) +{ + struct input_dev *input_dev = to_input_dev(dev); + + mutex_lock(&input_dev->mutex); + input_dev_reset(input_dev, true); + mutex_unlock(&input_dev->mutex); + + return 0; +} + +static const struct dev_pm_ops input_dev_pm_ops = { + .suspend = input_dev_suspend, + .resume = input_dev_resume, + .poweroff = input_dev_suspend, + .restore = input_dev_resume, +}; +#endif /* CONFIG_PM */ + static struct device_type input_dev_type = { .groups = input_dev_attr_groups, .release = input_dev_release, .uevent = input_dev_uevent, +#ifdef CONFIG_PM + .pm = &input_dev_pm_ops, +#endif }; static char *input_devnode(struct device *dev, mode_t *mode) diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 3525c19be42..ee98b1bc5d8 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -24,6 +24,16 @@ config KEYBOARD_AAED2000 To compile this driver as a module, choose M here: the module will be called aaed2000_kbd. +config KEYBOARD_ADP5588 + tristate "ADP5588 I2C QWERTY Keypad and IO Expander" + depends on I2C + help + Say Y here if you want to use a ADP5588 attached to your + system I2C bus. + + To compile this driver as a module, choose M here: the + module will be called adp5588-keys. + config KEYBOARD_AMIGA tristate "Amiga keyboard" depends on AMIGA @@ -104,6 +114,16 @@ config KEYBOARD_ATKBD_RDI_KEYCODES right-hand column will be interpreted as the key shown in the left-hand column. +config QT2160 + tristate "Atmel AT42QT2160 Touch Sensor Chip" + depends on I2C && EXPERIMENTAL + help + If you say yes here you get support for Atmel AT42QT2160 Touch + Sensor chip as a keyboard input. + + This driver can also be built as a module. If so, the module + will be called qt2160. + config KEYBOARD_BFIN tristate "Blackfin BF54x keypad support" depends on (BF54x && !BF544) @@ -251,6 +271,17 @@ config KEYBOARD_MAPLE To compile this driver as a module, choose M here: the module will be called maple_keyb. +config KEYBOARD_MAX7359 + tristate "Maxim MAX7359 Key Switch Controller" + depends on I2C + help + If you say yes here you get support for the Maxim MAX7359 Key + Switch Controller chip. This providers microprocessors with + management of up to 64 key switches + + To compile this driver as a module, choose M here: the + module will be called max7359_keypad. + config KEYBOARD_NEWTON tristate "Newton keyboard" select SERIO @@ -260,6 +291,15 @@ config KEYBOARD_NEWTON To compile this driver as a module, choose M here: the module will be called newtonkbd. +config KEYBOARD_OPENCORES + tristate "OpenCores Keyboard Controller" + help + Say Y here if you want to use the OpenCores Keyboard Controller + http://www.opencores.org/project,keyboardcontroller + + To compile this driver as a module, choose M here; the + module will be called opencores-kbd. + config KEYBOARD_PXA27x tristate "PXA27x/PXA3xx keypad support" depends on PXA27x || PXA3xx diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile index 8a7a22b3026..babad5e58b7 100644 --- a/drivers/input/keyboard/Makefile +++ b/drivers/input/keyboard/Makefile @@ -5,6 +5,7 @@ # Each configuration option enables a list of files. obj-$(CONFIG_KEYBOARD_AAED2000) += aaed2000_kbd.o +obj-$(CONFIG_KEYBOARD_ADP5588) += adp5588-keys.o obj-$(CONFIG_KEYBOARD_AMIGA) += amikbd.o obj-$(CONFIG_KEYBOARD_ATARI) += atakbd.o obj-$(CONFIG_KEYBOARD_ATKBD) += atkbd.o @@ -21,10 +22,13 @@ obj-$(CONFIG_KEYBOARD_LM8323) += lm8323.o obj-$(CONFIG_KEYBOARD_LOCOMO) += locomokbd.o obj-$(CONFIG_KEYBOARD_MAPLE) += maple_keyb.o obj-$(CONFIG_KEYBOARD_MATRIX) += matrix_keypad.o +obj-$(CONFIG_KEYBOARD_MAX7359) += max7359_keypad.o obj-$(CONFIG_KEYBOARD_NEWTON) += newtonkbd.o obj-$(CONFIG_KEYBOARD_OMAP) += omap-keypad.o +obj-$(CONFIG_KEYBOARD_OPENCORES) += opencores-kbd.o obj-$(CONFIG_KEYBOARD_PXA27x) += pxa27x_keypad.o obj-$(CONFIG_KEYBOARD_PXA930_ROTARY) += pxa930_rotary.o +obj-$(CONFIG_KEYBOARD_QT2160) += qt2160.o obj-$(CONFIG_KEYBOARD_SH_KEYSC) += sh_keysc.o obj-$(CONFIG_KEYBOARD_SPITZ) += spitzkbd.o obj-$(CONFIG_KEYBOARD_STOWAWAY) += stowaway.o diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c new file mode 100644 index 00000000000..d48c808d592 --- /dev/null +++ b/drivers/input/keyboard/adp5588-keys.c @@ -0,0 +1,361 @@ +/* + * File: drivers/input/keyboard/adp5588_keys.c + * Description: keypad driver for ADP5588 I2C QWERTY Keypad and IO Expander + * Bugs: Enter bugs at http://blackfin.uclinux.org/ + * + * Copyright (C) 2008-2009 Analog Devices Inc. + * Licensed under the GPL-2 or later. + */ + +#include <linux/module.h> +#include <linux/version.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/workqueue.h> +#include <linux/errno.h> +#include <linux/pm.h> +#include <linux/platform_device.h> +#include <linux/input.h> +#include <linux/i2c.h> + +#include <linux/i2c/adp5588.h> + + /* Configuration Register1 */ +#define AUTO_INC (1 << 7) +#define GPIEM_CFG (1 << 6) +#define OVR_FLOW_M (1 << 5) +#define INT_CFG (1 << 4) +#define OVR_FLOW_IEN (1 << 3) +#define K_LCK_IM (1 << 2) +#define GPI_IEN (1 << 1) +#define KE_IEN (1 << 0) + +/* Interrupt Status Register */ +#define CMP2_INT (1 << 5) +#define CMP1_INT (1 << 4) +#define OVR_FLOW_INT (1 << 3) +#define K_LCK_INT (1 << 2) +#define GPI_INT (1 << 1) +#define KE_INT (1 << 0) + +/* Key Lock and Event Counter Register */ +#define K_LCK_EN (1 << 6) +#define LCK21 0x30 +#define KEC 0xF + +/* Key Event Register xy */ +#define KEY_EV_PRESSED (1 << 7) +#define KEY_EV_MASK (0x7F) + +#define KP_SEL(x) (0xFFFF >> (16 - x)) /* 2^x-1 */ + +#define KEYP_MAX_EVENT 10 + +/* + * Early pre 4.0 Silicon required to delay readout by at least 25ms, + * since the Event Counter Register updated 25ms after the interrupt + * asserted. + */ +#define WA_DELAYED_READOUT_REVID(rev) ((rev) < 4) + +struct adp5588_kpad { + struct i2c_client *client; + struct input_dev *input; + struct delayed_work work; + unsigned long delay; + unsigned short keycode[ADP5588_KEYMAPSIZE]; +}; + +static int adp5588_read(struct i2c_client *client, u8 reg) +{ + int ret = i2c_smbus_read_byte_data(client, reg); + + if (ret < 0) + dev_err(&client->dev, "Read Error\n"); + + return ret; +} + +static int adp5588_write(struct i2c_client *client, u8 reg, u8 val) +{ + return i2c_smbus_write_byte_data(client, reg, val); +} + +static void adp5588_work(struct work_struct *work) +{ + struct adp5588_kpad *kpad = container_of(work, + struct adp5588_kpad, work.work); + struct i2c_client *client = kpad->client; + int i, key, status, ev_cnt; + + status = adp5588_read(client, INT_STAT); + + if (status & OVR_FLOW_INT) /* Unlikely and should never happen */ + dev_err(&client->dev, "Event Overflow Error\n"); + + if (status & KE_INT) { + ev_cnt = adp5588_read(client, KEY_LCK_EC_STAT) & KEC; + if (ev_cnt) { + for (i = 0; i < ev_cnt; i++) { + key = adp5588_read(client, Key_EVENTA + i); + input_report_key(kpad->input, + kpad->keycode[(key & KEY_EV_MASK) - 1], + key & KEY_EV_PRESSED); + } + input_sync(kpad->input); + } + } + adp5588_write(client, INT_STAT, status); /* Status is W1C */ +} + +static irqreturn_t adp5588_irq(int irq, void *handle) +{ + struct adp5588_kpad *kpad = handle; + + /* + * use keventd context to read the event fifo registers + * Schedule readout at least 25ms after notification for + * REVID < 4 + */ + + schedule_delayed_work(&kpad->work, kpad->delay); + + return IRQ_HANDLED; +} + +static int __devinit adp5588_setup(struct i2c_client *client) +{ + struct adp5588_kpad_platform_data *pdata = client->dev.platform_data; + int i, ret; + + ret = adp5588_write(client, KP_GPIO1, KP_SEL(pdata->rows)); + ret |= adp5588_write(client, KP_GPIO2, KP_SEL(pdata->cols) & 0xFF); + ret |= adp5588_write(client, KP_GPIO3, KP_SEL(pdata->cols) >> 8); + + if (pdata->en_keylock) { + ret |= adp5588_write(client, UNLOCK1, pdata->unlock_key1); + ret |= adp5588_write(client, UNLOCK2, pdata->unlock_key2); + ret |= adp5588_write(client, KEY_LCK_EC_STAT, K_LCK_EN); + } + + for (i = 0; i < KEYP_MAX_EVENT; i++) + ret |= adp5588_read(client, Key_EVENTA); + + ret |= adp5588_write(client, INT_STAT, CMP2_INT | CMP1_INT | + OVR_FLOW_INT | K_LCK_INT | + GPI_INT | KE_INT); /* Status is W1C */ + + ret |= adp5588_write(client, CFG, INT_CFG | OVR_FLOW_IEN | KE_IEN); + + if (ret < 0) { + dev_err(&client->dev, "Write Error\n"); + return ret; + } + + return 0; +} + +static int __devinit adp5588_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct adp5588_kpad *kpad; + struct adp5588_kpad_platform_data *pdata = client->dev.platform_data; + struct input_dev *input; + unsigned int revid; + int ret, i; + int error; + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_BYTE_DATA)) { + dev_err(&client->dev, "SMBUS Byte Data not Supported\n"); + return -EIO; + } + + if (!pdata) { + dev_err(&client->dev, "no platform data?\n"); + return -EINVAL; + } + + if (!pdata->rows || !pdata->cols || !pdata->keymap) { + dev_err(&client->dev, "no rows, cols or keymap from pdata\n"); + return -EINVAL; + } + + if (pdata->keymapsize != ADP5588_KEYMAPSIZE) { + dev_err(&client->dev, "invalid keymapsize\n"); + return -EINVAL; + } + + if (!client->irq) { + dev_err(&client->dev, "no IRQ?\n"); + return -EINVAL; + } + + kpad = kzalloc(sizeof(*kpad), GFP_KERNEL); + input = input_allocate_device(); + if (!kpad || !input) { + error = -ENOMEM; + goto err_free_mem; + } + + kpad->client = client; + kpad->input = input; + INIT_DELAYED_WORK(&kpad->work, adp5588_work); + + ret = adp5588_read(client, DEV_ID); + if (ret < 0) { + error = ret; + goto err_free_mem; + } + + revid = (u8) ret & ADP5588_DEVICE_ID_MASK; + if (WA_DELAYED_READOUT_REVID(revid)) + kpad->delay = msecs_to_jiffies(30); + + input->name = client->name; + input->phys = "adp5588-keys/input0"; + input->dev.parent = &client->dev; + + input_set_drvdata(input, kpad); + + input->id.bustype = BUS_I2C; + input->id.vendor = 0x0001; + input->id.product = 0x0001; + input->id.version = revid; + + input->keycodesize = sizeof(kpad->keycode[0]); + input->keycodemax = pdata->keymapsize; + input->keycode = kpad->keycode; + + memcpy(kpad->keycode, pdata->keymap, + pdata->keymapsize * input->keycodesize); + + /* setup input device */ + __set_bit(EV_KEY, input->evbit); + + if (pdata->repeat) + __set_bit(EV_REP, input->evbit); + + for (i = 0; i < input->keycodemax; i++) + __set_bit(kpad->keycode[i] & KEY_MAX, input->keybit); + __clear_bit(KEY_RESERVED, input->keybit); + + error = input_register_device(input); + if (error) { + dev_err(&client->dev, "unable to register input device\n"); + goto err_free_mem; + } + + error = request_irq(client->irq, adp5588_irq, + IRQF_TRIGGER_FALLING | IRQF_DISABLED, + client->dev.driver->name, kpad); + if (error) { + dev_err(&client->dev, "irq %d busy?\n", client->irq); + goto err_unreg_dev; + } + + error = adp5588_setup(client); + if (error) + goto err_free_irq; + + device_init_wakeup(&client->dev, 1); + i2c_set_clientdata(client, kpad); + + dev_info(&client->dev, "Rev.%d keypad, irq %d\n", revid, client->irq); + return 0; + + err_free_irq: + free_irq(client->irq, kpad); + err_unreg_dev: + input_unregister_device(input); + input = NULL; + err_free_mem: + input_free_device(input); + kfree(kpad); + + return error; +} + +static int __devexit adp5588_remove(struct i2c_client *client) +{ + struct adp5588_kpad *kpad = i2c_get_clientdata(client); + + adp5588_write(client, CFG, 0); + free_irq(client->irq, kpad); + cancel_delayed_work_sync(&kpad->work); + input_unregister_device(kpad->input); + i2c_set_clientdata(client, NULL); + kfree(kpad); + + return 0; +} + +#ifdef CONFIG_PM +static int adp5588_suspend(struct device *dev) +{ + struct adp5588_kpad *kpad = dev_get_drvdata(dev); + struct i2c_client *client = kpad->client; + + disable_irq(client->irq); + cancel_delayed_work_sync(&kpad->work); + + if (device_may_wakeup(&client->dev)) + enable_irq_wake(client->irq); + + return 0; +} + +static int adp5588_resume(struct device *dev) +{ + struct adp5588_kpad *kpad = dev_get_drvdata(dev); + struct i2c_client *client = kpad->client; + + if (device_may_wakeup(&client->dev)) + disable_irq_wake(client->irq); + + enable_irq(client->irq); + + return 0; +} + +static struct dev_pm_ops adp5588_dev_pm_ops = { + .suspend = adp5588_suspend, + .resume = adp5588_resume, +}; +#endif + +static const struct i2c_device_id adp5588_id[] = { + { KBUILD_MODNAME, 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, adp5588_id); + +static struct i2c_driver adp5588_driver = { + .driver = { + .name = KBUILD_MODNAME, +#ifdef CONFIG_PM + .pm = &adp5588_dev_pm_ops, +#endif + }, + .probe = adp5588_probe, + .remove = __devexit_p(adp5588_remove), + .id_table = adp5588_id, +}; + +static int __init adp5588_init(void) +{ + return i2c_add_driver(&adp5588_driver); +} +module_init(adp5588_init); + +static void __exit adp5588_exit(void) +{ + i2c_del_driver(&adp5588_driver); +} +module_exit(adp5588_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>"); +MODULE_DESCRIPTION("ADP5588 Keypad driver"); +MODULE_ALIAS("platform:adp5588-keys"); diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index adb09e2ba39..4709e15af60 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -773,23 +773,6 @@ static int atkbd_select_set(struct atkbd *atkbd, int target_set, int allow_extra static int atkbd_activate(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; - unsigned char param[1]; - -/* - * Set the LEDs to a defined state. - */ - - param[0] = 0; - if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS)) - return -1; - -/* - * Set autorepeat to fastest possible. - */ - - param[0] = 0; - if (ps2_command(ps2dev, param, ATKBD_CMD_SETREP)) - return -1; /* * Enable the keyboard to receive keystrokes. @@ -1158,14 +1141,6 @@ static int atkbd_reconnect(struct serio *serio) return -1; atkbd_activate(atkbd); - -/* - * Restore repeat rate and LEDs (that were reset by atkbd_activate) - * to pre-resume state - */ - if (!atkbd->softrepeat) - atkbd_set_repeat_rate(atkbd); - atkbd_set_leds(atkbd); } atkbd_enable(atkbd); diff --git a/drivers/input/keyboard/max7359_keypad.c b/drivers/input/keyboard/max7359_keypad.c new file mode 100644 index 00000000000..3b5b948eba3 --- /dev/null +++ b/drivers/input/keyboard/max7359_keypad.c @@ -0,0 +1,330 @@ +/* + * max7359_keypad.c - MAX7359 Key Switch Controller Driver + * + * Copyright (C) 2009 Samsung Electronics + * Kim Kyuwon <q1.kim@samsung.com> + * + * Based on pxa27x_keypad.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Datasheet: http://www.maxim-ic.com/quick_view2.cfm/qv_pk/5456 + */ + +#include <linux/module.h> +#include <linux/i2c.h> +#include <linux/interrupt.h> +#include <linux/input.h> +#include <linux/input/matrix_keypad.h> + +#define MAX7359_MAX_KEY_ROWS 8 +#define MAX7359_MAX_KEY_COLS 8 +#define MAX7359_MAX_KEY_NUM (MAX7359_MAX_KEY_ROWS * MAX7359_MAX_KEY_COLS) +#define MAX7359_ROW_SHIFT 3 + +/* + * MAX7359 registers + */ +#define MAX7359_REG_KEYFIFO 0x00 +#define MAX7359_REG_CONFIG 0x01 +#define MAX7359_REG_DEBOUNCE 0x02 +#define MAX7359_REG_INTERRUPT 0x03 +#define MAX7359_REG_PORTS 0x04 +#define MAX7359_REG_KEYREP 0x05 +#define MAX7359_REG_SLEEP 0x06 + +/* + * Configuration register bits + */ +#define MAX7359_CFG_SLEEP (1 << 7) +#define MAX7359_CFG_INTERRUPT (1 << 5) +#define MAX7359_CFG_KEY_RELEASE (1 << 3) +#define MAX7359_CFG_WAKEUP (1 << 1) +#define MAX7359_CFG_TIMEOUT (1 << 0) + +/* + * Autosleep register values (ms) + */ +#define MAX7359_AUTOSLEEP_8192 0x01 +#define MAX7359_AUTOSLEEP_4096 0x02 +#define MAX7359_AUTOSLEEP_2048 0x03 +#define MAX7359_AUTOSLEEP_1024 0x04 +#define MAX7359_AUTOSLEEP_512 0x05 +#define MAX7359_AUTOSLEEP_256 0x06 + +struct max7359_keypad { + /* matrix key code map */ + unsigned short keycodes[MAX7359_MAX_KEY_NUM]; + + struct input_dev *input_dev; + struct i2c_client *client; +}; + +static int max7359_write_reg(struct i2c_client *client, u8 reg, u8 val) +{ + int ret = i2c_smbus_write_byte_data(client, reg, val); + + if (ret < 0) + dev_err(&client->dev, "%s: reg 0x%x, val 0x%x, err %d\n", + __func__, reg, val, ret); + return ret; +} + +static int max7359_read_reg(struct i2c_client *client, int reg) +{ + int ret = i2c_smbus_read_byte_data(client, reg); + + if (ret < 0) + dev_err(&client->dev, "%s: reg 0x%x, err %d\n", + __func__, reg, ret); + return ret; +} + +static void max7359_build_keycode(struct max7359_keypad *keypad, + const struct matrix_keymap_data *keymap_data) +{ + struct input_dev *input_dev = keypad->input_dev; + int i; + + for (i = 0; i < keymap_data->keymap_size; i++) { + unsigned int key = keymap_data->keymap[i]; + unsigned int row = KEY_ROW(key); + unsigned int col = KEY_COL(key); + unsigned int scancode = MATRIX_SCAN_CODE(row, col, + MAX7359_ROW_SHIFT); + unsigned short keycode = KEY_VAL(key); + + keypad->keycodes[scancode] = keycode; + __set_bit(keycode, input_dev->keybit); + } + __clear_bit(KEY_RESERVED, input_dev->keybit); +} + +/* runs in an IRQ thread -- can (and will!) sleep */ +static irqreturn_t max7359_interrupt(int irq, void *dev_id) +{ + struct max7359_keypad *keypad = dev_id; + struct input_dev *input_dev = keypad->input_dev; + int val, row, col, release, code; + + val = max7359_read_reg(keypad->client, MAX7359_REG_KEYFIFO); + row = val & 0x7; + col = (val >> 3) & 0x7; + release = val & 0x40; + + code = MATRIX_SCAN_CODE(row, col, MAX7359_ROW_SHIFT); + + dev_dbg(&keypad->client->dev, + "key[%d:%d] %s\n", row, col, release ? "release" : "press"); + + input_event(input_dev, EV_MSC, MSC_SCAN, code); + input_report_key(input_dev, keypad->keycodes[code], !release); + input_sync(input_dev); + + return IRQ_HANDLED; +} + +/* + * Let MAX7359 fall into a deep sleep: + * If no keys are pressed, enter sleep mode for 8192 ms. And if any + * key is pressed, the MAX7359 returns to normal operating mode. + */ +static inline void max7359_fall_deepsleep(struct i2c_client *client) +{ + max7359_write_reg(client, MAX7359_REG_SLEEP, MAX7359_AUTOSLEEP_8192); +} + +/* + * Let MAX7359 take a catnap: + * Autosleep just for 256 ms. + */ +static inline void max7359_take_catnap(struct i2c_client *client) +{ + max7359_write_reg(client, MAX7359_REG_SLEEP, MAX7359_AUTOSLEEP_256); +} + +static int max7359_open(struct input_dev *dev) +{ + struct max7359_keypad *keypad = input_get_drvdata(dev); + + max7359_take_catnap(keypad->client); + + return 0; +} + +static void max7359_close(struct input_dev *dev) +{ + struct max7359_keypad *keypad = input_get_drvdata(dev); + + max7359_fall_deepsleep(keypad->client); +} + +static void max7359_initialize(struct i2c_client *client) +{ + max7359_write_reg(client, MAX7359_REG_CONFIG, + MAX7359_CFG_INTERRUPT | /* Irq clears after host read */ + MAX7359_CFG_KEY_RELEASE | /* Key release enable */ + MAX7359_CFG_WAKEUP); /* Key press wakeup enable */ + + /* Full key-scan functionality */ + max7359_write_reg(client, MAX7359_REG_DEBOUNCE, 0x1F); + + /* nINT asserts every debounce cycles */ + max7359_write_reg(client, MAX7359_REG_INTERRUPT, 0x01); + + max7359_fall_deepsleep(client); +} + +static int __devinit max7359_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + const struct matrix_keymap_data *keymap_data = client->dev.platform_data; + struct max7359_keypad *keypad; + struct input_dev *input_dev; + int ret; + int error; + + if (!client->irq) { + dev_err(&client->dev, "The irq number should not be zero\n"); + return -EINVAL; + } + + /* Detect MAX7359: The initial Keys FIFO value is '0x3F' */ + ret = max7359_read_reg(client, MAX7359_REG_KEYFIFO); + if (ret < 0) { + dev_err(&client->dev, "failed to detect device\n"); + return -ENODEV; + } + + dev_dbg(&client->dev, "keys FIFO is 0x%02x\n", ret); + + keypad = kzalloc(sizeof(struct max7359_keypad), GFP_KERNEL); + input_dev = input_allocate_device(); + if (!keypad || !input_dev) { + dev_err(&client->dev, "failed to allocate memory\n"); + error = -ENOMEM; + goto failed_free_mem; + } + + keypad->client = client; + keypad->input_dev = input_dev; + + input_dev->name = client->name; + input_dev->id.bustype = BUS_I2C; + input_dev->open = max7359_open; + input_dev->close = max7359_close; + input_dev->dev.parent = &client->dev; + + input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP); + input_dev->keycodesize = sizeof(keypad->keycodes[0]); + input_dev->keycodemax = ARRAY_SIZE(keypad->keycodes); + input_dev->keycode = keypad->keycodes; + + input_set_capability(input_dev, EV_MSC, MSC_SCAN); + input_set_drvdata(input_dev, keypad); + + max7359_build_keycode(keypad, keymap_data); + + error = request_threaded_irq(client->irq, NULL, max7359_interrupt, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + client->name, keypad); + if (error) { + dev_err(&client->dev, "failed to register interrupt\n"); + goto failed_free_mem; + } + + /* Register the input device */ + error = input_register_device(input_dev); + if (error) { + dev_err(&client->dev, "failed to register input device\n"); + goto failed_free_irq; + } + + /* Initialize MAX7359 */ + max7359_initialize(client); + + i2c_set_clientdata(client, keypad); + device_init_wakeup(&client->dev, 1); + + return 0; + +failed_free_irq: + free_irq(client->irq, keypad); +failed_free_mem: + input_free_device(input_dev); + kfree(keypad); + return error; +} + +static int __devexit max7359_remove(struct i2c_client *client) +{ + struct max7359_keypad *keypad = i2c_get_clientdata(client); + + free_irq(client->irq, keypad); + input_unregister_device(keypad->input_dev); + i2c_set_clientdata(client, NULL); + kfree(keypad); + + return 0; +} + +#ifdef CONFIG_PM +static int max7359_suspend(struct i2c_client *client, pm_message_t mesg) +{ + max7359_fall_deepsleep(client); + + if (device_may_wakeup(&client->dev)) + enable_irq_wake(client->irq); + + return 0; +} + +static int max7359_resume(struct i2c_client *client) +{ + if (device_may_wakeup(&client->dev)) + disable_irq_wake(client->irq); + + /* Restore the default setting */ + max7359_take_catnap(client); + + return 0; +} +#else +#define max7359_suspend NULL +#define max7359_resume NULL +#endif + +static const struct i2c_device_id max7359_ids[] = { + { "max7359", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, max7359_ids); + +static struct i2c_driver max7359_i2c_driver = { + .driver = { + .name = "max7359", + }, + .probe = max7359_probe, + .remove = __devexit_p(max7359_remove), + .suspend = max7359_suspend, + .resume = max7359_resume, + .id_table = max7359_ids, +}; + +static int __init max7359_init(void) +{ + return i2c_add_driver(&max7359_i2c_driver); +} +module_init(max7359_init); + +static void __exit max7359_exit(void) +{ + i2c_del_driver(&max7359_i2c_driver); +} +module_exit(max7359_exit); + +MODULE_AUTHOR("Kim Kyuwon <q1.kim@samsung.com>"); +MODULE_DESCRIPTION("MAX7359 Key Switch Controller Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/input/keyboard/opencores-kbd.c b/drivers/input/keyboard/opencores-kbd.c new file mode 100644 index 00000000000..78cccddbf55 --- /dev/null +++ b/drivers/input/keyboard/opencores-kbd.c @@ -0,0 +1,180 @@ +/* + * OpenCores Keyboard Controller Driver + * http://www.opencores.org/project,keyboardcontroller + * + * Copyright 2007-2009 HV Sistemas S.L. + * + * Licensed under the GPL-2 or later. + */ + +#include <linux/input.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/ioport.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> + +struct opencores_kbd { + struct input_dev *input; + struct resource *addr_res; + void __iomem *addr; + int irq; + unsigned short keycodes[128]; +}; + +static irqreturn_t opencores_kbd_isr(int irq, void *dev_id) +{ + struct opencores_kbd *opencores_kbd = dev_id; + struct input_dev *input = opencores_kbd->input; + unsigned char c; + + c = readb(opencores_kbd->addr); + input_report_key(input, c & 0x7f, c & 0x80 ? 0 : 1); + input_sync(input); + + return IRQ_HANDLED; +} + +static int __devinit opencores_kbd_probe(struct platform_device *pdev) +{ + struct input_dev *input; + struct opencores_kbd *opencores_kbd; + struct resource *res; + int irq, i, error; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "missing board memory resource\n"); + return -EINVAL; + } + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "missing board IRQ resource\n"); + return -EINVAL; + } + + opencores_kbd = kzalloc(sizeof(*opencores_kbd), GFP_KERNEL); + input = input_allocate_device(); + if (!opencores_kbd || !input) { + dev_err(&pdev->dev, "failed to allocate device structures\n"); + error = -ENOMEM; + goto err_free_mem; + } + + opencores_kbd->addr_res = res; + res = request_mem_region(res->start, resource_size(res), pdev->name); + if (!res) { + dev_err(&pdev->dev, "failed to request I/O memory\n"); + error = -EBUSY; + goto err_free_mem; + } + + opencores_kbd->addr = ioremap(res->start, resource_size(res)); + if (!opencores_kbd->addr) { + dev_err(&pdev->dev, "failed to remap I/O memory\n"); + error = -ENXIO; + goto err_rel_mem; + } + + opencores_kbd->input = input; + opencores_kbd->irq = irq; + + input->name = pdev->name; + input->phys = "opencores-kbd/input0"; + input->dev.parent = &pdev->dev; + + input_set_drvdata(input, opencores_kbd); + + input->id.bustype = BUS_HOST; + input->id.vendor = 0x0001; + input->id.product = 0x0001; + input->id.version = 0x0100; + + input->keycode = opencores_kbd->keycodes; + input->keycodesize = sizeof(opencores_kbd->keycodes[0]); + input->keycodemax = ARRAY_SIZE(opencores_kbd->keycodes); + + __set_bit(EV_KEY, input->evbit); + + for (i = 0; i < ARRAY_SIZE(opencores_kbd->keycodes); i++) { + /* + * OpenCores controller happens to have scancodes match + * our KEY_* definitions. + */ + opencores_kbd->keycodes[i] = i; + __set_bit(opencores_kbd->keycodes[i], input->keybit); + } + __clear_bit(KEY_RESERVED, input->keybit); + + error = request_irq(irq, &opencores_kbd_isr, + IRQF_TRIGGER_RISING, pdev->name, opencores_kbd); + if (error) { + dev_err(&pdev->dev, "unable to claim irq %d\n", irq); + goto err_unmap_mem; + } + + error = input_register_device(input); + if (error) { + dev_err(&pdev->dev, "unable to register input device\n"); + goto err_free_irq; + } + + platform_set_drvdata(pdev, opencores_kbd); + + return 0; + + err_free_irq: + free_irq(irq, opencores_kbd); + err_unmap_mem: + iounmap(opencores_kbd->addr); + err_rel_mem: + release_mem_region(res->start, resource_size(res)); + err_free_mem: + input_free_device(input); + kfree(opencores_kbd); + + return error; +} + +static int __devexit opencores_kbd_remove(struct platform_device *pdev) +{ + struct opencores_kbd *opencores_kbd = platform_get_drvdata(pdev); + + free_irq(opencores_kbd->irq, opencores_kbd); + + iounmap(opencores_kbd->addr); + release_mem_region(opencores_kbd->addr_res->start, + resource_size(opencores_kbd->addr_res)); + input_unregister_device(opencores_kbd->input); + kfree(opencores_kbd); + + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static struct platform_driver opencores_kbd_device_driver = { + .probe = opencores_kbd_probe, + .remove = __devexit_p(opencores_kbd_remove), + .driver = { + .name = "opencores-kbd", + }, +}; + +static int __init opencores_kbd_init(void) +{ + return platform_driver_register(&opencores_kbd_device_driver); +} +module_init(opencores_kbd_init); + +static void __exit opencores_kbd_exit(void) +{ + platform_driver_unregister(&opencores_kbd_device_driver); +} +module_exit(opencores_kbd_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Javier Herrero <jherrero@hvsistemas.es>"); +MODULE_DESCRIPTION("Keyboard driver for OpenCores Keyboard Controller"); diff --git a/drivers/input/keyboard/qt2160.c b/drivers/input/keyboard/qt2160.c new file mode 100644 index 00000000000..191cc51d6cf --- /dev/null +++ b/drivers/input/keyboard/qt2160.c @@ -0,0 +1,397 @@ +/* + * qt2160.c - Atmel AT42QT2160 Touch Sense Controller + * + * Copyright (C) 2009 Raphael Derosso Pereira <raphaelpereira@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/jiffies.h> +#include <linux/i2c.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/input.h> + +#define QT2160_VALID_CHIPID 0x11 + +#define QT2160_CMD_CHIPID 0 +#define QT2160_CMD_CODEVER 1 +#define QT2160_CMD_GSTAT 2 +#define QT2160_CMD_KEYS3 3 +#define QT2160_CMD_KEYS4 4 +#define QT2160_CMD_SLIDE 5 +#define QT2160_CMD_GPIOS 6 +#define QT2160_CMD_SUBVER 7 +#define QT2160_CMD_CALIBRATE 10 + +#define QT2160_CYCLE_INTERVAL (2*HZ) + +static unsigned char qt2160_key2code[] = { + KEY_0, KEY_1, KEY_2, KEY_3, + KEY_4, KEY_5, KEY_6, KEY_7, + KEY_8, KEY_9, KEY_A, KEY_B, + KEY_C, KEY_D, KEY_E, KEY_F, +}; + +struct qt2160_data { + struct i2c_client *client; + struct input_dev *input; + struct delayed_work dwork; + spinlock_t lock; /* Protects canceling/rescheduling of dwork */ + unsigned short keycodes[ARRAY_SIZE(qt2160_key2code)]; + u16 key_matrix; +}; + +static int qt2160_read_block(struct i2c_client *client, + u8 inireg, u8 *buffer, unsigned int count) +{ + int error, idx = 0; + + /* + * Can't use SMBus block data read. Check for I2C functionality to speed + * things up whenever possible. Otherwise we will be forced to read + * sequentially. + */ + if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + + error = i2c_smbus_write_byte(client, inireg + idx); + if (error) { + dev_err(&client->dev, + "couldn't send request. Returned %d\n", error); + return error; + } + + error = i2c_master_recv(client, buffer, count); + if (error != count) { + dev_err(&client->dev, + "couldn't read registers. Returned %d bytes\n", error); + return error; + } + } else { + + while (count--) { + int data; + + error = i2c_smbus_write_byte(client, inireg + idx); + if (error) { + dev_err(&client->dev, + "couldn't send request. Returned %d\n", error); + return error; + } + + data = i2c_smbus_read_byte(client); + if (data < 0) { + dev_err(&client->dev, + "couldn't read register. Returned %d\n", data); + return data; + } + + buffer[idx++] = data; + } + } + + return 0; +} + +static int qt2160_get_key_matrix(struct qt2160_data *qt2160) +{ + struct i2c_client *client = qt2160->client; + struct input_dev *input = qt2160->input; + u8 regs[6]; + u16 old_matrix, new_matrix; + int ret, i, mask; + + dev_dbg(&client->dev, "requesting keys...\n"); + + /* + * Read all registers from General Status Register + * to GPIOs register + */ + ret = qt2160_read_block(client, QT2160_CMD_GSTAT, regs, 6); + if (ret) { + dev_err(&client->dev, + "could not perform chip read.\n"); + return ret; + } + + old_matrix = qt2160->key_matrix; + qt2160->key_matrix = new_matrix = (regs[2] << 8) | regs[1]; + + mask = 0x01; + for (i = 0; i < 16; ++i, mask <<= 1) { + int keyval = new_matrix & mask; + + if ((old_matrix & mask) != keyval) { + input_report_key(input, qt2160->keycodes[i], keyval); + dev_dbg(&client->dev, "key %d %s\n", + i, keyval ? "pressed" : "released"); + } + } + + input_sync(input); + + return 0; +} + +static irqreturn_t qt2160_irq(int irq, void *_qt2160) +{ + struct qt2160_data *qt2160 = _qt2160; + unsigned long flags; + + spin_lock_irqsave(&qt2160->lock, flags); + + __cancel_delayed_work(&qt2160->dwork); + schedule_delayed_work(&qt2160->dwork, 0); + + spin_unlock_irqrestore(&qt2160->lock, flags); + + return IRQ_HANDLED; +} + +static void qt2160_schedule_read(struct qt2160_data *qt2160) +{ + spin_lock_irq(&qt2160->lock); + schedule_delayed_work(&qt2160->dwork, QT2160_CYCLE_INTERVAL); + spin_unlock_irq(&qt2160->lock); +} + +static void qt2160_worker(struct work_struct *work) +{ + struct qt2160_data *qt2160 = + container_of(work, struct qt2160_data, dwork.work); + + dev_dbg(&qt2160->client->dev, "worker\n"); + + qt2160_get_key_matrix(qt2160); + + /* Avoid device lock up by checking every so often */ + qt2160_schedule_read(qt2160); +} + +static int __devinit qt2160_read(struct i2c_client *client, u8 reg) +{ + int ret; + + ret = i2c_smbus_write_byte(client, reg); + if (ret) { + dev_err(&client->dev, + "couldn't send request. Returned %d\n", ret); + return ret; + } + + ret = i2c_smbus_read_byte(client); + if (ret < 0) { + dev_err(&client->dev, + "couldn't read register. Returned %d\n", ret); + return ret; + } + + return ret; +} + +static int __devinit qt2160_write(struct i2c_client *client, u8 reg, u8 data) +{ + int error; + + error = i2c_smbus_write_byte(client, reg); + if (error) { + dev_err(&client->dev, + "couldn't send request. Returned %d\n", error); + return error; + } + + error = i2c_smbus_write_byte(client, data); + if (error) { + dev_err(&client->dev, + "couldn't write data. Returned %d\n", error); + return error; + } + + return error; +} + + +static bool __devinit qt2160_identify(struct i2c_client *client) +{ + int id, ver, rev; + + /* Read Chid ID to check if chip is valid */ + id = qt2160_read(client, QT2160_CMD_CHIPID); + if (id != QT2160_VALID_CHIPID) { + dev_err(&client->dev, "ID %d not supported\n", id); + return false; + } + + /* Read chip firmware version */ + ver = qt2160_read(client, QT2160_CMD_CODEVER); + if (ver < 0) { + dev_err(&client->dev, "could not get firmware version\n"); + return false; + } + + /* Read chip firmware revision */ + rev = qt2160_read(client, QT2160_CMD_SUBVER); + if (rev < 0) { + dev_err(&client->dev, "could not get firmware revision\n"); + return false; + } + + dev_info(&client->dev, "AT42QT2160 firmware version %d.%d.%d\n", + ver >> 4, ver & 0xf, rev); + + return true; +} + +static int __devinit qt2160_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct qt2160_data *qt2160; + struct input_dev *input; + int i; + int error; + + /* Check functionality */ + error = i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_BYTE); + if (!error) { + dev_err(&client->dev, "%s adapter not supported\n", + dev_driver_string(&client->adapter->dev)); + return -ENODEV; + } + + if (!qt2160_identify(client)) + return -ENODEV; + + /* Chip is valid and active. Allocate structure */ + qt2160 = kzalloc(sizeof(struct qt2160_data), GFP_KERNEL); + input = input_allocate_device(); + if (!qt2160 || !input) { + dev_err(&client->dev, "insufficient memory\n"); + error = -ENOMEM; + goto err_free_mem; + } + + qt2160->client = client; + qt2160->input = input; + INIT_DELAYED_WORK(&qt2160->dwork, qt2160_worker); + spin_lock_init(&qt2160->lock); + + input->name = "AT42QT2160 Touch Sense Keyboard"; + input->id.bustype = BUS_I2C; + + input->keycode = qt2160->keycodes; + input->keycodesize = sizeof(qt2160->keycodes[0]); + input->keycodemax = ARRAY_SIZE(qt2160_key2code); + + __set_bit(EV_KEY, input->evbit); + __clear_bit(EV_REP, input->evbit); + for (i = 0; i < ARRAY_SIZE(qt2160_key2code); i++) { + qt2160->keycodes[i] = qt2160_key2code[i]; + __set_bit(qt2160_key2code[i], input->keybit); + } + __clear_bit(KEY_RESERVED, input->keybit); + + /* Calibrate device */ + error = qt2160_write(client, QT2160_CMD_CALIBRATE, 1); + if (error) { + dev_err(&client->dev, "failed to calibrate device\n"); + goto err_free_mem; + } + + if (client->irq) { + error = request_irq(client->irq, qt2160_irq, + IRQF_TRIGGER_FALLING, "qt2160", qt2160); + if (error) { + dev_err(&client->dev, + "failed to allocate irq %d\n", client->irq); + goto err_free_mem; + } + } + + error = input_register_device(qt2160->input); + if (error) { + dev_err(&client->dev, + "Failed to register input device\n"); + goto err_free_irq; + } + + i2c_set_clientdata(client, qt2160); + qt2160_schedule_read(qt2160); + + return 0; + +err_free_irq: + if (client->irq) + free_irq(client->irq, qt2160); +err_free_mem: + input_free_device(input); + kfree(qt2160); + return error; +} + +static int __devexit qt2160_remove(struct i2c_client *client) +{ + struct qt2160_data *qt2160 = i2c_get_clientdata(client); + + /* Release IRQ so no queue will be scheduled */ + if (client->irq) + free_irq(client->irq, qt2160); + + cancel_delayed_work_sync(&qt2160->dwork); + + input_unregister_device(qt2160->input); + kfree(qt2160); + + i2c_set_clientdata(client, NULL); + return 0; +} + +static struct i2c_device_id qt2160_idtable[] = { + { "qt2160", 0, }, + { } +}; + +MODULE_DEVICE_TABLE(i2c, qt2160_idtable); + +static struct i2c_driver qt2160_driver = { + .driver = { + .name = "qt2160", + .owner = THIS_MODULE, + }, + + .id_table = qt2160_idtable, + .probe = qt2160_probe, + .remove = __devexit_p(qt2160_remove), +}; + +static int __init qt2160_init(void) +{ + return i2c_add_driver(&qt2160_driver); +} +module_init(qt2160_init); + +static void __exit qt2160_cleanup(void) +{ + i2c_del_driver(&qt2160_driver); +} +module_exit(qt2160_cleanup); + +MODULE_AUTHOR("Raphael Derosso Pereira <raphaelpereira@gmail.com>"); +MODULE_DESCRIPTION("Driver for AT42QT2160 Touch Sensor"); +MODULE_LICENSE("GPL"); diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 76d6751f89a..02f4f8f1db6 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -225,6 +225,7 @@ config INPUT_SGI_BTNS config INPUT_WINBOND_CIR tristate "Winbond IR remote control" depends on X86 && PNP + select NEW_LEDS select LEDS_CLASS select BITREVERSE help diff --git a/drivers/input/misc/dm355evm_keys.c b/drivers/input/misc/dm355evm_keys.c index 0918acae584..f2b67dc81d8 100644 --- a/drivers/input/misc/dm355evm_keys.c +++ b/drivers/input/misc/dm355evm_keys.c @@ -96,7 +96,13 @@ static struct { { 0x3169, KEY_PAUSE, }, }; -/* runs in an IRQ thread -- can (and will!) sleep */ +/* + * Because we communicate with the MSP430 using I2C, and all I2C calls + * in Linux sleep, we use a threaded IRQ handler. The IRQ itself is + * active low, but we go through the GPIO controller so we can trigger + * on falling edges and not worry about enabling/disabling the IRQ in + * the keypress handling path. + */ static irqreturn_t dm355evm_keys_irq(int irq, void *_keys) { struct dm355evm_keys *keys = _keys; @@ -171,18 +177,6 @@ static irqreturn_t dm355evm_keys_irq(int irq, void *_keys) return IRQ_HANDLED; } -/* - * Because we communicate with the MSP430 using I2C, and all I2C calls - * in Linux sleep, we use a threaded IRQ handler. The IRQ itself is - * active low, but we go through the GPIO controller so we can trigger - * on falling edges and not worry about enabling/disabling the IRQ in - * the keypress handling path. - */ -static irqreturn_t dm355evm_keys_hardirq(int irq, void *_keys) -{ - return IRQ_WAKE_THREAD; -} - static int dm355evm_setkeycode(struct input_dev *dev, int index, int keycode) { u16 old_keycode; @@ -257,10 +251,8 @@ static int __devinit dm355evm_keys_probe(struct platform_device *pdev) /* REVISIT: flush the event queue? */ - status = request_threaded_irq(keys->irq, - dm355evm_keys_hardirq, dm355evm_keys_irq, - IRQF_TRIGGER_FALLING, - dev_name(&pdev->dev), keys); + status = request_threaded_irq(keys->irq, NULL, dm355evm_keys_irq, + IRQF_TRIGGER_FALLING, dev_name(&pdev->dev), keys); if (status < 0) goto fail1; diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c index 84e2fc04d11..f84cbd97c88 100644 --- a/drivers/input/mouse/sentelic.c +++ b/drivers/input/mouse/sentelic.c @@ -92,7 +92,8 @@ static int fsp_reg_read(struct psmouse *psmouse, int reg_addr, int *reg_val) */ ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE); psmouse_set_state(psmouse, PSMOUSE_CMD_MODE); - mutex_lock(&ps2dev->cmd_mutex); + + ps2_begin_command(ps2dev); if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) goto out; @@ -126,7 +127,7 @@ static int fsp_reg_read(struct psmouse *psmouse, int reg_addr, int *reg_val) rc = 0; out: - mutex_unlock(&ps2dev->cmd_mutex); + ps2_end_command(ps2dev); ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE); psmouse_set_state(psmouse, PSMOUSE_ACTIVATED); dev_dbg(&ps2dev->serio->dev, "READ REG: 0x%02x is 0x%02x (rc = %d)\n", @@ -140,7 +141,7 @@ static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val) unsigned char v; int rc = -1; - mutex_lock(&ps2dev->cmd_mutex); + ps2_begin_command(ps2dev); if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) goto out; @@ -179,7 +180,7 @@ static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val) rc = 0; out: - mutex_unlock(&ps2dev->cmd_mutex); + ps2_end_command(ps2dev); dev_dbg(&ps2dev->serio->dev, "WRITE REG: 0x%02x to 0x%02x (rc = %d)\n", reg_addr, reg_val, rc); return rc; @@ -214,7 +215,8 @@ static int fsp_page_reg_read(struct psmouse *psmouse, int *reg_val) ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE); psmouse_set_state(psmouse, PSMOUSE_CMD_MODE); - mutex_lock(&ps2dev->cmd_mutex); + + ps2_begin_command(ps2dev); if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) goto out; @@ -236,7 +238,7 @@ static int fsp_page_reg_read(struct psmouse *psmouse, int *reg_val) rc = 0; out: - mutex_unlock(&ps2dev->cmd_mutex); + ps2_end_command(ps2dev); ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE); psmouse_set_state(psmouse, PSMOUSE_ACTIVATED); dev_dbg(&ps2dev->serio->dev, "READ PAGE REG: 0x%02x (rc = %d)\n", @@ -250,7 +252,7 @@ static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val) unsigned char v; int rc = -1; - mutex_lock(&ps2dev->cmd_mutex); + ps2_begin_command(ps2dev); if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0) goto out; @@ -275,7 +277,7 @@ static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val) rc = 0; out: - mutex_unlock(&ps2dev->cmd_mutex); + ps2_end_command(ps2dev); dev_dbg(&ps2dev->serio->dev, "WRITE PAGE REG: to 0x%02x (rc = %d)\n", reg_val, rc); return rc; diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c index eac9fdde7ee..7283c78044a 100644 --- a/drivers/input/mouse/synaptics_i2c.c +++ b/drivers/input/mouse/synaptics_i2c.c @@ -203,7 +203,7 @@ MODULE_PARM_DESC(no_filter, "No Filter. Default = 0 (off)"); * and the irq configuration should be set to Falling Edge Trigger */ /* Control IRQ / Polling option */ -static int polling_req; +static bool polling_req; module_param(polling_req, bool, 0444); MODULE_PARM_DESC(polling_req, "Request Polling. Default = 0 (use irq)"); @@ -217,6 +217,7 @@ struct synaptics_i2c { struct i2c_client *client; struct input_dev *input; struct delayed_work dwork; + spinlock_t lock; int no_data_count; int no_decel_param; int reduce_report_param; @@ -366,17 +367,28 @@ static bool synaptics_i2c_get_input(struct synaptics_i2c *touch) return xy_delta || gesture; } -static irqreturn_t synaptics_i2c_irq(int irq, void *dev_id) +static void synaptics_i2c_reschedule_work(struct synaptics_i2c *touch, + unsigned long delay) { - struct synaptics_i2c *touch = dev_id; + unsigned long flags; + + spin_lock_irqsave(&touch->lock, flags); /* - * We want to have the work run immediately but it might have - * already been scheduled with a delay, that's why we have to - * cancel it first. + * If work is already scheduled then subsequent schedules will not + * change the scheduled time that's why we have to cancel it first. */ - cancel_delayed_work(&touch->dwork); - schedule_delayed_work(&touch->dwork, 0); + __cancel_delayed_work(&touch->dwork); + schedule_delayed_work(&touch->dwork, delay); + + spin_unlock_irqrestore(&touch->lock, flags); +} + +static irqreturn_t synaptics_i2c_irq(int irq, void *dev_id) +{ + struct synaptics_i2c *touch = dev_id; + + synaptics_i2c_reschedule_work(touch, 0); return IRQ_HANDLED; } @@ -452,7 +464,7 @@ static void synaptics_i2c_work_handler(struct work_struct *work) * We poll the device once in THREAD_IRQ_SLEEP_SECS and * if error is detected, we try to reset and reconfigure the touchpad. */ - schedule_delayed_work(&touch->dwork, delay); + synaptics_i2c_reschedule_work(touch, delay); } static int synaptics_i2c_open(struct input_dev *input) @@ -465,8 +477,8 @@ static int synaptics_i2c_open(struct input_dev *input) return ret; if (polling_req) - schedule_delayed_work(&touch->dwork, - msecs_to_jiffies(NO_DATA_SLEEP_MSECS)); + synaptics_i2c_reschedule_work(touch, + msecs_to_jiffies(NO_DATA_SLEEP_MSECS)); return 0; } @@ -521,6 +533,7 @@ struct synaptics_i2c *synaptics_i2c_touch_create(struct i2c_client *client) touch->scan_rate_param = scan_rate; set_scan_rate(touch, scan_rate); INIT_DELAYED_WORK(&touch->dwork, synaptics_i2c_work_handler); + spin_lock_init(&touch->lock); return touch; } @@ -535,14 +548,12 @@ static int __devinit synaptics_i2c_probe(struct i2c_client *client, if (!touch) return -ENOMEM; - i2c_set_clientdata(client, touch); - ret = synaptics_i2c_reset_config(client); if (ret) goto err_mem_free; if (client->irq < 1) - polling_req = 1; + polling_req = true; touch->input = input_allocate_device(); if (!touch->input) { @@ -563,7 +574,7 @@ static int __devinit synaptics_i2c_probe(struct i2c_client *client, dev_warn(&touch->client->dev, "IRQ request failed: %d, " "falling back to polling\n", ret); - polling_req = 1; + polling_req = true; synaptics_i2c_reg_set(touch->client, INTERRUPT_EN_REG, 0); } @@ -580,12 +591,14 @@ static int __devinit synaptics_i2c_probe(struct i2c_client *client, "Input device register failed: %d\n", ret); goto err_input_free; } + + i2c_set_clientdata(client, touch); + return 0; err_input_free: input_free_device(touch->input); err_mem_free: - i2c_set_clientdata(client, NULL); kfree(touch); return ret; @@ -596,7 +609,7 @@ static int __devexit synaptics_i2c_remove(struct i2c_client *client) struct synaptics_i2c *touch = i2c_get_clientdata(client); if (!polling_req) - free_irq(touch->client->irq, touch); + free_irq(client->irq, touch); input_unregister_device(touch->input); i2c_set_clientdata(client, NULL); @@ -627,8 +640,8 @@ static int synaptics_i2c_resume(struct i2c_client *client) if (ret) return ret; - schedule_delayed_work(&touch->dwork, - msecs_to_jiffies(NO_DATA_SLEEP_MSECS)); + synaptics_i2c_reschedule_work(touch, + msecs_to_jiffies(NO_DATA_SLEEP_MSECS)); return 0; } diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index eb3ff94af58..bc56e52b945 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -87,8 +87,22 @@ static bool i8042_bypass_aux_irq_test; #include "i8042.h" +/* + * i8042_lock protects serialization between i8042_command and + * the interrupt handler. + */ static DEFINE_SPINLOCK(i8042_lock); +/* + * Writers to AUX and KBD ports as well as users issuing i8042_command + * directly should acquire i8042_mutex (by means of calling + * i8042_lock_chip() and i8042_unlock_ship() helpers) to ensure that + * they do not disturb each other (unfortunately in many i8042 + * implementations write to one of the ports will immediately abort + * command that is being processed by another port). + */ +static DEFINE_MUTEX(i8042_mutex); + struct i8042_port { struct serio *serio; int irq; @@ -113,6 +127,18 @@ static struct platform_device *i8042_platform_device; static irqreturn_t i8042_interrupt(int irq, void *dev_id); +void i8042_lock_chip(void) +{ + mutex_lock(&i8042_mutex); +} +EXPORT_SYMBOL(i8042_lock_chip); + +void i8042_unlock_chip(void) +{ + mutex_unlock(&i8042_mutex); +} +EXPORT_SYMBOL(i8042_unlock_chip); + /* * The i8042_wait_read() and i8042_wait_write functions wait for the i8042 to * be ready for reading values from it / writing values to it. @@ -1161,6 +1187,21 @@ static void __devexit i8042_unregister_ports(void) } } +/* + * Checks whether port belongs to i8042 controller. + */ +bool i8042_check_port_owner(const struct serio *port) +{ + int i; + + for (i = 0; i < I8042_NUM_PORTS; i++) + if (i8042_ports[i].serio == port) + return true; + + return false; +} +EXPORT_SYMBOL(i8042_check_port_owner); + static void i8042_free_irqs(void) { if (i8042_aux_irq_registered) diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c index 3a95b508bf2..769ba65a585 100644 --- a/drivers/input/serio/libps2.c +++ b/drivers/input/serio/libps2.c @@ -17,6 +17,7 @@ #include <linux/interrupt.h> #include <linux/input.h> #include <linux/serio.h> +#include <linux/i8042.h> #include <linux/init.h> #include <linux/libps2.h> @@ -54,6 +55,24 @@ int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout) } EXPORT_SYMBOL(ps2_sendbyte); +void ps2_begin_command(struct ps2dev *ps2dev) +{ + mutex_lock(&ps2dev->cmd_mutex); + + if (i8042_check_port_owner(ps2dev->serio)) + i8042_lock_chip(); +} +EXPORT_SYMBOL(ps2_begin_command); + +void ps2_end_command(struct ps2dev *ps2dev) +{ + if (i8042_check_port_owner(ps2dev->serio)) + i8042_unlock_chip(); + + mutex_unlock(&ps2dev->cmd_mutex); +} +EXPORT_SYMBOL(ps2_end_command); + /* * ps2_drain() waits for device to transmit requested number of bytes * and discards them. @@ -66,7 +85,7 @@ void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout) maxbytes = sizeof(ps2dev->cmdbuf); } - mutex_lock(&ps2dev->cmd_mutex); + ps2_begin_command(ps2dev); serio_pause_rx(ps2dev->serio); ps2dev->flags = PS2_FLAG_CMD; @@ -76,7 +95,8 @@ void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout) wait_event_timeout(ps2dev->wait, !(ps2dev->flags & PS2_FLAG_CMD), msecs_to_jiffies(timeout)); - mutex_unlock(&ps2dev->cmd_mutex); + + ps2_end_command(ps2dev); } EXPORT_SYMBOL(ps2_drain); @@ -237,9 +257,9 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command) { int rc; - mutex_lock(&ps2dev->cmd_mutex); + ps2_begin_command(ps2dev); rc = __ps2_command(ps2dev, param, command); - mutex_unlock(&ps2dev->cmd_mutex); + ps2_end_command(ps2dev); return rc; } diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index ab02d72afbf..8cc453c85ea 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -48,8 +48,8 @@ config TOUCHSCREEN_AD7879_I2C select TOUCHSCREEN_AD7879 help Say Y here if you have a touchscreen interface using the - AD7879-1 controller, and your board-specific initialization - code includes that in its table of I2C devices. + AD7879-1/AD7889-1 controller, and your board-specific + initialization code includes that in its table of I2C devices. If unsure, say N (but it's safe to say "Y"). @@ -62,7 +62,7 @@ config TOUCHSCREEN_AD7879_SPI select TOUCHSCREEN_AD7879 help Say Y here if you have a touchscreen interface using the - AD7879 controller, and your board-specific initialization + AD7879/AD7889 controller, and your board-specific initialization code includes that in its table of SPI devices. If unsure, say N (but it's safe to say "Y"). @@ -169,6 +169,17 @@ config TOUCHSCREEN_WACOM_W8001 To compile this driver as a module, choose M here: the module will be called wacom_w8001. +config TOUCHSCREEN_MCS5000 + tristate "MELFAS MCS-5000 touchscreen" + depends on I2C + help + Say Y here if you have the MELFAS MCS-5000 touchscreen controller + chip in your system. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called mcs5000_ts. config TOUCHSCREEN_MTOUCH tristate "MicroTouch serial touchscreens" diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index 4599bf7ad81..15fa62cffc7 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_TOUCHSCREEN_EETI) += eeti_ts.o obj-$(CONFIG_TOUCHSCREEN_ELO) += elo.o obj-$(CONFIG_TOUCHSCREEN_FUJITSU) += fujitsu_ts.o obj-$(CONFIG_TOUCHSCREEN_INEXIO) += inexio.o +obj-$(CONFIG_TOUCHSCREEN_MCS5000) += mcs5000_ts.o obj-$(CONFIG_TOUCHSCREEN_MIGOR) += migor_ts.o obj-$(CONFIG_TOUCHSCREEN_MTOUCH) += mtouch.o obj-$(CONFIG_TOUCHSCREEN_MK712) += mk712.o diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c index 19b4db7e974..f06332c9e21 100644 --- a/drivers/input/touchscreen/ad7879.c +++ b/drivers/input/touchscreen/ad7879.c @@ -1,7 +1,8 @@ /* - * Copyright (C) 2008 Michael Hennerich, Analog Devices Inc. + * Copyright (C) 2008-2009 Michael Hennerich, Analog Devices Inc. * - * Description: AD7879 based touchscreen, and GPIO driver (I2C/SPI Interface) + * Description: AD7879/AD7889 based touchscreen, and GPIO driver + * (I2C/SPI Interface) * * Bugs: Enter bugs at http://blackfin.uclinux.org/ * @@ -747,6 +748,7 @@ static int __devexit ad7879_remove(struct i2c_client *client) static const struct i2c_device_id ad7879_id[] = { { "ad7879", 0 }, + { "ad7889", 0 }, { } }; MODULE_DEVICE_TABLE(i2c, ad7879_id); diff --git a/drivers/input/touchscreen/mcs5000_ts.c b/drivers/input/touchscreen/mcs5000_ts.c new file mode 100644 index 00000000000..4c28b89757f --- /dev/null +++ b/drivers/input/touchscreen/mcs5000_ts.c @@ -0,0 +1,318 @@ +/* + * mcs5000_ts.c - Touchscreen driver for MELFAS MCS-5000 controller + * + * Copyright (C) 2009 Samsung Electronics Co.Ltd + * Author: Joonyoung Shim <jy0922.shim@samsung.com> + * + * Based on wm97xx-core.c + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/i2c.h> +#include <linux/i2c/mcs5000_ts.h> +#include <linux/interrupt.h> +#include <linux/input.h> +#include <linux/irq.h> + +/* Registers */ +#define MCS5000_TS_STATUS 0x00 +#define STATUS_OFFSET 0 +#define STATUS_NO (0 << STATUS_OFFSET) +#define STATUS_INIT (1 << STATUS_OFFSET) +#define STATUS_SENSING (2 << STATUS_OFFSET) +#define STATUS_COORD (3 << STATUS_OFFSET) +#define STATUS_GESTURE (4 << STATUS_OFFSET) +#define ERROR_OFFSET 4 +#define ERROR_NO (0 << ERROR_OFFSET) +#define ERROR_POWER_ON_RESET (1 << ERROR_OFFSET) +#define ERROR_INT_RESET (2 << ERROR_OFFSET) +#define ERROR_EXT_RESET (3 << ERROR_OFFSET) +#define ERROR_INVALID_REG_ADDRESS (8 << ERROR_OFFSET) +#define ERROR_INVALID_REG_VALUE (9 << ERROR_OFFSET) + +#define MCS5000_TS_OP_MODE 0x01 +#define RESET_OFFSET 0 +#define RESET_NO (0 << RESET_OFFSET) +#define RESET_EXT_SOFT (1 << RESET_OFFSET) +#define OP_MODE_OFFSET 1 +#define OP_MODE_SLEEP (0 << OP_MODE_OFFSET) +#define OP_MODE_ACTIVE (1 << OP_MODE_OFFSET) +#define GESTURE_OFFSET 4 +#define GESTURE_DISABLE (0 << GESTURE_OFFSET) +#define GESTURE_ENABLE (1 << GESTURE_OFFSET) +#define PROXIMITY_OFFSET 5 +#define PROXIMITY_DISABLE (0 << PROXIMITY_OFFSET) +#define PROXIMITY_ENABLE (1 << PROXIMITY_OFFSET) +#define SCAN_MODE_OFFSET 6 +#define SCAN_MODE_INTERRUPT (0 << SCAN_MODE_OFFSET) +#define SCAN_MODE_POLLING (1 << SCAN_MODE_OFFSET) +#define REPORT_RATE_OFFSET 7 +#define REPORT_RATE_40 (0 << REPORT_RATE_OFFSET) +#define REPORT_RATE_80 (1 << REPORT_RATE_OFFSET) + +#define MCS5000_TS_SENS_CTL 0x02 +#define MCS5000_TS_FILTER_CTL 0x03 +#define PRI_FILTER_OFFSET 0 +#define SEC_FILTER_OFFSET 4 + +#define MCS5000_TS_X_SIZE_UPPER 0x08 +#define MCS5000_TS_X_SIZE_LOWER 0x09 +#define MCS5000_TS_Y_SIZE_UPPER 0x0A +#define MCS5000_TS_Y_SIZE_LOWER 0x0B + +#define MCS5000_TS_INPUT_INFO 0x10 +#define INPUT_TYPE_OFFSET 0 +#define INPUT_TYPE_NONTOUCH (0 << INPUT_TYPE_OFFSET) +#define INPUT_TYPE_SINGLE (1 << INPUT_TYPE_OFFSET) +#define INPUT_TYPE_DUAL (2 << INPUT_TYPE_OFFSET) +#define INPUT_TYPE_PALM (3 << INPUT_TYPE_OFFSET) +#define INPUT_TYPE_PROXIMITY (7 << INPUT_TYPE_OFFSET) +#define GESTURE_CODE_OFFSET 3 +#define GESTURE_CODE_NO (0 << GESTURE_CODE_OFFSET) + +#define MCS5000_TS_X_POS_UPPER 0x11 +#define MCS5000_TS_X_POS_LOWER 0x12 +#define MCS5000_TS_Y_POS_UPPER 0x13 +#define MCS5000_TS_Y_POS_LOWER 0x14 +#define MCS5000_TS_Z_POS 0x15 +#define MCS5000_TS_WIDTH 0x16 +#define MCS5000_TS_GESTURE_VAL 0x17 +#define MCS5000_TS_MODULE_REV 0x20 +#define MCS5000_TS_FIRMWARE_VER 0x21 + +/* Touchscreen absolute values */ +#define MCS5000_MAX_XC 0x3ff +#define MCS5000_MAX_YC 0x3ff + +enum mcs5000_ts_read_offset { + READ_INPUT_INFO, + READ_X_POS_UPPER, + READ_X_POS_LOWER, + READ_Y_POS_UPPER, + READ_Y_POS_LOWER, + READ_BLOCK_SIZE, +}; + +/* Each client has this additional data */ +struct mcs5000_ts_data { + struct i2c_client *client; + struct input_dev *input_dev; + const struct mcs5000_ts_platform_data *platform_data; +}; + +static irqreturn_t mcs5000_ts_interrupt(int irq, void *dev_id) +{ + struct mcs5000_ts_data *data = dev_id; + struct i2c_client *client = data->client; + u8 buffer[READ_BLOCK_SIZE]; + int err; + int x; + int y; + + err = i2c_smbus_read_i2c_block_data(client, MCS5000_TS_INPUT_INFO, + READ_BLOCK_SIZE, buffer); + if (err < 0) { + dev_err(&client->dev, "%s, err[%d]\n", __func__, err); + goto out; + } + + switch (buffer[READ_INPUT_INFO]) { + case INPUT_TYPE_NONTOUCH: + input_report_key(data->input_dev, BTN_TOUCH, 0); + input_sync(data->input_dev); + break; + + case INPUT_TYPE_SINGLE: + x = (buffer[READ_X_POS_UPPER] << 8) | buffer[READ_X_POS_LOWER]; + y = (buffer[READ_Y_POS_UPPER] << 8) | buffer[READ_Y_POS_LOWER]; + + input_report_key(data->input_dev, BTN_TOUCH, 1); + input_report_abs(data->input_dev, ABS_X, x); + input_report_abs(data->input_dev, ABS_Y, y); + input_sync(data->input_dev); + break; + + case INPUT_TYPE_DUAL: + /* TODO */ + break; + + case INPUT_TYPE_PALM: + /* TODO */ + break; + + case INPUT_TYPE_PROXIMITY: + /* TODO */ + break; + + default: + dev_err(&client->dev, "Unknown ts input type %d\n", + buffer[READ_INPUT_INFO]); + break; + } + + out: + return IRQ_HANDLED; +} + +static void mcs5000_ts_phys_init(struct mcs5000_ts_data *data) +{ + const struct mcs5000_ts_platform_data *platform_data = + data->platform_data; + struct i2c_client *client = data->client; + + /* Touch reset & sleep mode */ + i2c_smbus_write_byte_data(client, MCS5000_TS_OP_MODE, + RESET_EXT_SOFT | OP_MODE_SLEEP); + + /* Touch size */ + i2c_smbus_write_byte_data(client, MCS5000_TS_X_SIZE_UPPER, + platform_data->x_size >> 8); + i2c_smbus_write_byte_data(client, MCS5000_TS_X_SIZE_LOWER, + platform_data->x_size & 0xff); + i2c_smbus_write_byte_data(client, MCS5000_TS_Y_SIZE_UPPER, + platform_data->y_size >> 8); + i2c_smbus_write_byte_data(client, MCS5000_TS_Y_SIZE_LOWER, + platform_data->y_size & 0xff); + + /* Touch active mode & 80 report rate */ + i2c_smbus_write_byte_data(data->client, MCS5000_TS_OP_MODE, + OP_MODE_ACTIVE | REPORT_RATE_80); +} + +static int __devinit mcs5000_ts_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct mcs5000_ts_data *data; + struct input_dev *input_dev; + int ret; + + if (!client->dev.platform_data) + return -EINVAL; + + data = kzalloc(sizeof(struct mcs5000_ts_data), GFP_KERNEL); + input_dev = input_allocate_device(); + if (!data || !input_dev) { + dev_err(&client->dev, "Failed to allocate memory\n"); + ret = -ENOMEM; + goto err_free_mem; + } + + data->client = client; + data->input_dev = input_dev; + data->platform_data = client->dev.platform_data; + + input_dev->name = "MELPAS MCS-5000 Touchscreen"; + input_dev->id.bustype = BUS_I2C; + input_dev->dev.parent = &client->dev; + + __set_bit(EV_ABS, input_dev->evbit); + __set_bit(EV_KEY, input_dev->evbit); + __set_bit(BTN_TOUCH, input_dev->keybit); + input_set_abs_params(input_dev, ABS_X, 0, MCS5000_MAX_XC, 0, 0); + input_set_abs_params(input_dev, ABS_Y, 0, MCS5000_MAX_YC, 0, 0); + + input_set_drvdata(input_dev, data); + + if (data->platform_data->cfg_pin) + data->platform_data->cfg_pin(); + + ret = request_threaded_irq(client->irq, NULL, mcs5000_ts_interrupt, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, "mcs5000_ts", data); + + if (ret < 0) { + dev_err(&client->dev, "Failed to register interrupt\n"); + goto err_free_mem; + } + + ret = input_register_device(data->input_dev); + if (ret < 0) + goto err_free_irq; + + mcs5000_ts_phys_init(data); + i2c_set_clientdata(client, data); + + return 0; + +err_free_irq: + free_irq(client->irq, data); +err_free_mem: + input_free_device(input_dev); + kfree(data); + return ret; +} + +static int __devexit mcs5000_ts_remove(struct i2c_client *client) +{ + struct mcs5000_ts_data *data = i2c_get_clientdata(client); + + free_irq(client->irq, data); + input_unregister_device(data->input_dev); + kfree(data); + i2c_set_clientdata(client, NULL); + + return 0; +} + +#ifdef CONFIG_PM +static int mcs5000_ts_suspend(struct i2c_client *client, pm_message_t mesg) +{ + /* Touch sleep mode */ + i2c_smbus_write_byte_data(client, MCS5000_TS_OP_MODE, OP_MODE_SLEEP); + + return 0; +} + +static int mcs5000_ts_resume(struct i2c_client *client) +{ + struct mcs5000_ts_data *data = i2c_get_clientdata(client); + + mcs5000_ts_phys_init(data); + + return 0; +} +#else +#define mcs5000_ts_suspend NULL +#define mcs5000_ts_resume NULL +#endif + +static const struct i2c_device_id mcs5000_ts_id[] = { + { "mcs5000_ts", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, mcs5000_ts_id); + +static struct i2c_driver mcs5000_ts_driver = { + .probe = mcs5000_ts_probe, + .remove = __devexit_p(mcs5000_ts_remove), + .suspend = mcs5000_ts_suspend, + .resume = mcs5000_ts_resume, + .driver = { + .name = "mcs5000_ts", + }, + .id_table = mcs5000_ts_id, +}; + +static int __init mcs5000_ts_init(void) +{ + return i2c_add_driver(&mcs5000_ts_driver); +} + +static void __exit mcs5000_ts_exit(void) +{ + i2c_del_driver(&mcs5000_ts_driver); +} + +module_init(mcs5000_ts_init); +module_exit(mcs5000_ts_exit); + +/* Module information */ +MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>"); +MODULE_DESCRIPTION("Touchscreen driver for MELFAS MCS-5000 controller"); +MODULE_LICENSE("GPL"); diff --git a/drivers/leds/leds-clevo-mail.c b/drivers/leds/leds-clevo-mail.c index 1813c84ea5f..f2242db5401 100644 --- a/drivers/leds/leds-clevo-mail.c +++ b/drivers/leds/leds-clevo-mail.c @@ -93,6 +93,8 @@ static struct dmi_system_id __initdata mail_led_whitelist[] = { static void clevo_mail_led_set(struct led_classdev *led_cdev, enum led_brightness value) { + i8042_lock_chip(); + if (value == LED_OFF) i8042_command(NULL, CLEVO_MAIL_LED_OFF); else if (value <= LED_HALF) @@ -100,6 +102,8 @@ static void clevo_mail_led_set(struct led_classdev *led_cdev, else i8042_command(NULL, CLEVO_MAIL_LED_BLINK_1HZ); + i8042_unlock_chip(); + } static int clevo_mail_led_blink(struct led_classdev *led_cdev, @@ -108,6 +112,8 @@ static int clevo_mail_led_blink(struct led_classdev *led_cdev, { int status = -EINVAL; + i8042_lock_chip(); + if (*delay_on == 0 /* ms */ && *delay_off == 0 /* ms */) { /* Special case: the leds subsystem requested us to * chose one user friendly blinking of the LED, and @@ -135,6 +141,8 @@ static int clevo_mail_led_blink(struct led_classdev *led_cdev, *delay_on, *delay_off); } + i8042_unlock_chip(); + return status; } diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 020f9573fd8..2158377a135 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -124,6 +124,8 @@ config MD_RAID456 select MD_RAID6_PQ select ASYNC_MEMCPY select ASYNC_XOR + select ASYNC_PQ + select ASYNC_RAID6_RECOV ---help--- A RAID-5 set of N drives with a capacity of C MB per drive provides the capacity of C * (N - 1) MB, and protects against a failure @@ -152,9 +154,33 @@ config MD_RAID456 If unsure, say Y. +config MULTICORE_RAID456 + bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)" + depends on MD_RAID456 + depends on SMP + depends on EXPERIMENTAL + ---help--- + Enable the raid456 module to dispatch per-stripe raid operations to a + thread pool. + + If unsure, say N. + config MD_RAID6_PQ tristate +config ASYNC_RAID6_TEST + tristate "Self test for hardware accelerated raid6 recovery" + depends on MD_RAID6_PQ + select ASYNC_RAID6_RECOV + ---help--- + This is a one-shot self test that permutes through the + recovery of all the possible two disk failure scenarios for a + N-disk array. Recovery is performed with the asynchronous + raid6 recovery routines, and will optionally use an offload + engine if one is available. + + If unsure, say N. + config MD_MULTIPATH tristate "Multipath I/O support" depends on BLK_DEV_MD diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 3319c2fec28..6986b0059d2 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -108,6 +108,8 @@ static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) * allocated while we're using it */ static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) +__releases(bitmap->lock) +__acquires(bitmap->lock) { unsigned char *mappage; @@ -325,7 +327,6 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) return 0; bad_alignment: - rcu_read_unlock(); return -EINVAL; } @@ -1207,6 +1208,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, sector_t offset, int *blocks, int create) +__releases(bitmap->lock) +__acquires(bitmap->lock) { /* If 'create', we might release the lock and reclaim it. * The lock must have been taken with interrupts enabled. diff --git a/drivers/md/linear.c b/drivers/md/linear.c index ea484290544..1ceceb334d5 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -108,6 +108,9 @@ static int linear_congested(void *data, int bits) linear_conf_t *conf; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; + rcu_read_lock(); conf = rcu_dereference(mddev->private); diff --git a/drivers/md/md.c b/drivers/md/md.c index 6aa497e4baf..26ba42a7912 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -262,6 +262,12 @@ static void mddev_resume(mddev_t *mddev) mddev->pers->quiesce(mddev, 0); } +int mddev_congested(mddev_t *mddev, int bits) +{ + return mddev->suspended; +} +EXPORT_SYMBOL(mddev_congested); + static inline mddev_t *mddev_get(mddev_t *mddev) { @@ -4218,7 +4224,7 @@ static int do_md_run(mddev_t * mddev) set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, - "%s_resync"); + "resync"); if (!mddev->sync_thread) { printk(KERN_ERR "%s: could not start resync" " thread...\n", @@ -4575,10 +4581,10 @@ static int get_version(void __user * arg) static int get_array_info(mddev_t * mddev, void __user * arg) { mdu_array_info_t info; - int nr,working,active,failed,spare; + int nr,working,insync,failed,spare; mdk_rdev_t *rdev; - nr=working=active=failed=spare=0; + nr=working=insync=failed=spare=0; list_for_each_entry(rdev, &mddev->disks, same_set) { nr++; if (test_bit(Faulty, &rdev->flags)) @@ -4586,7 +4592,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) else { working++; if (test_bit(In_sync, &rdev->flags)) - active++; + insync++; else spare++; } @@ -4611,7 +4617,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) info.state = (1<<MD_SB_CLEAN); if (mddev->bitmap && mddev->bitmap_offset) info.state = (1<<MD_SB_BITMAP_PRESENT); - info.active_disks = active; + info.active_disks = insync; info.working_disks = working; info.failed_disks = failed; info.spare_disks = spare; @@ -4721,7 +4727,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) if (!list_empty(&mddev->disks)) { mdk_rdev_t *rdev0 = list_entry(mddev->disks.next, mdk_rdev_t, same_set); - int err = super_types[mddev->major_version] + err = super_types[mddev->major_version] .load_super(rdev, rdev0, mddev->minor_version); if (err < 0) { printk(KERN_WARNING @@ -5631,7 +5637,10 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, thread->run = run; thread->mddev = mddev; thread->timeout = MAX_SCHEDULE_TIMEOUT; - thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev)); + thread->tsk = kthread_run(md_thread, thread, + "%s_%s", + mdname(thread->mddev), + name ?: mddev->pers->name); if (IS_ERR(thread->tsk)) { kfree(thread); return NULL; @@ -6745,7 +6754,7 @@ void md_check_recovery(mddev_t *mddev) } mddev->sync_thread = md_register_thread(md_do_sync, mddev, - "%s_resync"); + "resync"); if (!mddev->sync_thread) { printk(KERN_ERR "%s: could not start resync" " thread...\n", diff --git a/drivers/md/md.h b/drivers/md/md.h index f55d2ff9513..f184b69ef33 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -430,6 +430,7 @@ extern void md_write_end(mddev_t *mddev); extern void md_done_sync(mddev_t *mddev, int blocks, int ok); extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); +extern int mddev_congested(mddev_t *mddev, int bits); extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, sector_t sector, int size, struct page *page); extern void md_super_wait(mddev_t *mddev); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index d2d3fd54cc6..ee7646f974a 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -150,7 +150,6 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) } mp_bh = mempool_alloc(conf->pool, GFP_NOIO); - memset(mp_bh, 0, sizeof(*mp_bh)); mp_bh->master_bio = bio; mp_bh->mddev = mddev; @@ -199,6 +198,9 @@ static int multipath_congested(void *data, int bits) multipath_conf_t *conf = mddev->private; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; + rcu_read_lock(); for (i = 0; i < mddev->raid_disks ; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); @@ -504,7 +506,7 @@ static int multipath_run (mddev_t *mddev) } { - mddev->thread = md_register_thread(multipathd, mddev, "%s_multipath"); + mddev->thread = md_register_thread(multipathd, mddev, NULL); if (!mddev->thread) { printk(KERN_ERR "multipath: couldn't allocate thread" " for %s\n", mdname(mddev)); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f845ed98fec..d3a4ce06015 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -44,6 +44,9 @@ static int raid0_congested(void *data, int bits) mdk_rdev_t **devlist = conf->devlist; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; + for (i = 0; i < mddev->raid_disks && !ret ; i++) { struct request_queue *q = bdev_get_queue(devlist[i]->bdev); @@ -86,7 +89,7 @@ static void dump_zones(mddev_t *mddev) static int create_strip_zones(mddev_t *mddev) { - int i, c, j, err; + int i, c, err; sector_t curr_zone_end, sectors; mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev; struct strip_zone *zone; @@ -198,6 +201,8 @@ static int create_strip_zones(mddev_t *mddev) /* now do the other zones */ for (i = 1; i < conf->nr_strip_zones; i++) { + int j; + zone = conf->strip_zone + i; dev = conf->devlist + i * mddev->raid_disks; @@ -207,7 +212,6 @@ static int create_strip_zones(mddev_t *mddev) c = 0; for (j=0; j<cnt; j++) { - char b[BDEVNAME_SIZE]; rdev = conf->devlist[j]; printk(KERN_INFO "raid0: checking %s ...", bdevname(rdev->bdev, b)); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ff7ed333599..d1b9bd5fd4f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -576,6 +576,9 @@ static int raid1_congested(void *data, int bits) conf_t *conf = mddev->private; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; + rcu_read_lock(); for (i = 0; i < mddev->raid_disks; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); @@ -851,7 +854,7 @@ static int make_request(struct request_queue *q, struct bio * bio) read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset; read_bio->bi_bdev = mirror->rdev->bdev; read_bio->bi_end_io = raid1_end_read_request; - read_bio->bi_rw = READ | do_sync; + read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); read_bio->bi_private = r1_bio; generic_make_request(read_bio); @@ -943,7 +946,8 @@ static int make_request(struct request_queue *q, struct bio * bio) mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; mbio->bi_bdev = conf->mirrors[i].rdev->bdev; mbio->bi_end_io = raid1_end_write_request; - mbio->bi_rw = WRITE | do_barriers | do_sync; + mbio->bi_rw = WRITE | (do_barriers << BIO_RW_BARRIER) | + (do_sync << BIO_RW_SYNCIO); mbio->bi_private = r1_bio; if (behind_pages) { @@ -1623,7 +1627,8 @@ static void raid1d(mddev_t *mddev) conf->mirrors[i].rdev->data_offset; bio->bi_bdev = conf->mirrors[i].rdev->bdev; bio->bi_end_io = raid1_end_write_request; - bio->bi_rw = WRITE | do_sync; + bio->bi_rw = WRITE | + (do_sync << BIO_RW_SYNCIO); bio->bi_private = r1_bio; r1_bio->bios[i] = bio; generic_make_request(bio); @@ -1672,7 +1677,7 @@ static void raid1d(mddev_t *mddev) bio->bi_sector = r1_bio->sector + rdev->data_offset; bio->bi_bdev = rdev->bdev; bio->bi_end_io = raid1_end_read_request; - bio->bi_rw = READ | do_sync; + bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); bio->bi_private = r1_bio; unplug = 1; generic_make_request(bio); @@ -2047,7 +2052,7 @@ static int run(mddev_t *mddev) conf->last_used = j; - mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1"); + mddev->thread = md_register_thread(raid1d, mddev, NULL); if (!mddev->thread) { printk(KERN_ERR "raid1: couldn't allocate thread for %s\n", diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d0a2152e064..51c4c5c4d87 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -631,6 +631,8 @@ static int raid10_congested(void *data, int bits) conf_t *conf = mddev->private; int i, ret = 0; + if (mddev_congested(mddev, bits)) + return 1; rcu_read_lock(); for (i = 0; i < mddev->raid_disks && ret == 0; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); @@ -882,7 +884,7 @@ static int make_request(struct request_queue *q, struct bio * bio) mirror->rdev->data_offset; read_bio->bi_bdev = mirror->rdev->bdev; read_bio->bi_end_io = raid10_end_read_request; - read_bio->bi_rw = READ | do_sync; + read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); read_bio->bi_private = r10_bio; generic_make_request(read_bio); @@ -950,7 +952,7 @@ static int make_request(struct request_queue *q, struct bio * bio) conf->mirrors[d].rdev->data_offset; mbio->bi_bdev = conf->mirrors[d].rdev->bdev; mbio->bi_end_io = raid10_end_write_request; - mbio->bi_rw = WRITE | do_sync; + mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO); mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); @@ -1623,7 +1625,7 @@ static void raid10d(mddev_t *mddev) bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr + rdev->data_offset; bio->bi_bdev = rdev->bdev; - bio->bi_rw = READ | do_sync; + bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); bio->bi_private = r10_bio; bio->bi_end_io = raid10_end_read_request; unplug = 1; @@ -1773,7 +1775,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { /* recovery... the complicated one */ - int i, j, k; + int j, k; r10_bio = NULL; for (i=0 ; i<conf->raid_disks; i++) @@ -2188,7 +2190,7 @@ static int run(mddev_t *mddev) } - mddev->thread = md_register_thread(raid10d, mddev, "%s_raid10"); + mddev->thread = md_register_thread(raid10d, mddev, NULL); if (!mddev->thread) { printk(KERN_ERR "raid10: couldn't allocate thread for %s\n", diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 826eb346735..94829804ab7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -47,7 +47,9 @@ #include <linux/kthread.h> #include <linux/raid/pq.h> #include <linux/async_tx.h> +#include <linux/async.h> #include <linux/seq_file.h> +#include <linux/cpu.h> #include "md.h" #include "raid5.h" #include "bitmap.h" @@ -499,11 +501,18 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, struct page *bio_page; int i; int page_offset; + struct async_submit_ctl submit; + enum async_tx_flags flags = 0; if (bio->bi_sector >= sector) page_offset = (signed)(bio->bi_sector - sector) * 512; else page_offset = (signed)(sector - bio->bi_sector) * -512; + + if (frombio) + flags |= ASYNC_TX_FENCE; + init_async_submit(&submit, flags, tx, NULL, NULL, NULL); + bio_for_each_segment(bvl, bio, i) { int len = bio_iovec_idx(bio, i)->bv_len; int clen; @@ -525,15 +534,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, bio_page = bio_iovec_idx(bio, i)->bv_page; if (frombio) tx = async_memcpy(page, bio_page, page_offset, - b_offset, clen, - ASYNC_TX_DEP_ACK, - tx, NULL, NULL); + b_offset, clen, &submit); else tx = async_memcpy(bio_page, page, b_offset, - page_offset, clen, - ASYNC_TX_DEP_ACK, - tx, NULL, NULL); + page_offset, clen, &submit); } + /* chain the operations */ + submit.depend_tx = tx; + if (clen < len) /* hit end of page */ break; page_offset += len; @@ -592,6 +600,7 @@ static void ops_run_biofill(struct stripe_head *sh) { struct dma_async_tx_descriptor *tx = NULL; raid5_conf_t *conf = sh->raid_conf; + struct async_submit_ctl submit; int i; pr_debug("%s: stripe %llu\n", __func__, @@ -615,22 +624,34 @@ static void ops_run_biofill(struct stripe_head *sh) } atomic_inc(&sh->count); - async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, - ops_complete_biofill, sh); + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); + async_trigger_callback(&submit); } -static void ops_complete_compute5(void *stripe_head_ref) +static void mark_target_uptodate(struct stripe_head *sh, int target) { - struct stripe_head *sh = stripe_head_ref; - int target = sh->ops.target; - struct r5dev *tgt = &sh->dev[target]; + struct r5dev *tgt; - pr_debug("%s: stripe %llu\n", __func__, - (unsigned long long)sh->sector); + if (target < 0) + return; + tgt = &sh->dev[target]; set_bit(R5_UPTODATE, &tgt->flags); BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); clear_bit(R5_Wantcompute, &tgt->flags); +} + +static void ops_complete_compute(void *stripe_head_ref) +{ + struct stripe_head *sh = stripe_head_ref; + + pr_debug("%s: stripe %llu\n", __func__, + (unsigned long long)sh->sector); + + /* mark the computed target(s) as uptodate */ + mark_target_uptodate(sh, sh->ops.target); + mark_target_uptodate(sh, sh->ops.target2); + clear_bit(STRIPE_COMPUTE_RUN, &sh->state); if (sh->check_state == check_state_compute_run) sh->check_state = check_state_compute_result; @@ -638,16 +659,24 @@ static void ops_complete_compute5(void *stripe_head_ref) release_stripe(sh); } -static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) +/* return a pointer to the address conversion region of the scribble buffer */ +static addr_conv_t *to_addr_conv(struct stripe_head *sh, + struct raid5_percpu *percpu) +{ + return percpu->scribble + sizeof(struct page *) * (sh->disks + 2); +} + +static struct dma_async_tx_descriptor * +ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) { - /* kernel stack size limits the total number of disks */ int disks = sh->disks; - struct page *xor_srcs[disks]; + struct page **xor_srcs = percpu->scribble; int target = sh->ops.target; struct r5dev *tgt = &sh->dev[target]; struct page *xor_dest = tgt->page; int count = 0; struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; int i; pr_debug("%s: stripe %llu block: %d\n", @@ -660,17 +689,215 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) atomic_inc(&sh->count); + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, + ops_complete_compute, sh, to_addr_conv(sh, percpu)); if (unlikely(count == 1)) - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, - 0, NULL, ops_complete_compute5, sh); + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); else - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_XOR_ZERO_DST, NULL, - ops_complete_compute5, sh); + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); return tx; } +/* set_syndrome_sources - populate source buffers for gen_syndrome + * @srcs - (struct page *) array of size sh->disks + * @sh - stripe_head to parse + * + * Populates srcs in proper layout order for the stripe and returns the + * 'count' of sources to be used in a call to async_gen_syndrome. The P + * destination buffer is recorded in srcs[count] and the Q destination + * is recorded in srcs[count+1]]. + */ +static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh) +{ + int disks = sh->disks; + int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); + int d0_idx = raid6_d0(sh); + int count; + int i; + + for (i = 0; i < disks; i++) + srcs[i] = (void *)raid6_empty_zero_page; + + count = 0; + i = d0_idx; + do { + int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); + + srcs[slot] = sh->dev[i].page; + i = raid6_next_disk(i, disks); + } while (i != d0_idx); + BUG_ON(count != syndrome_disks); + + return count; +} + +static struct dma_async_tx_descriptor * +ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) +{ + int disks = sh->disks; + struct page **blocks = percpu->scribble; + int target; + int qd_idx = sh->qd_idx; + struct dma_async_tx_descriptor *tx; + struct async_submit_ctl submit; + struct r5dev *tgt; + struct page *dest; + int i; + int count; + + if (sh->ops.target < 0) + target = sh->ops.target2; + else if (sh->ops.target2 < 0) + target = sh->ops.target; + else + /* we should only have one valid target */ + BUG(); + BUG_ON(target < 0); + pr_debug("%s: stripe %llu block: %d\n", + __func__, (unsigned long long)sh->sector, target); + + tgt = &sh->dev[target]; + BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); + dest = tgt->page; + + atomic_inc(&sh->count); + + if (target == qd_idx) { + count = set_syndrome_sources(blocks, sh); + blocks[count] = NULL; /* regenerating p is not necessary */ + BUG_ON(blocks[count+1] != dest); /* q should already be set */ + init_async_submit(&submit, ASYNC_TX_FENCE, NULL, + ops_complete_compute, sh, + to_addr_conv(sh, percpu)); + tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); + } else { + /* Compute any data- or p-drive using XOR */ + count = 0; + for (i = disks; i-- ; ) { + if (i == target || i == qd_idx) + continue; + blocks[count++] = sh->dev[i].page; + } + + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, + NULL, ops_complete_compute, sh, + to_addr_conv(sh, percpu)); + tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit); + } + + return tx; +} + +static struct dma_async_tx_descriptor * +ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) +{ + int i, count, disks = sh->disks; + int syndrome_disks = sh->ddf_layout ? disks : disks-2; + int d0_idx = raid6_d0(sh); + int faila = -1, failb = -1; + int target = sh->ops.target; + int target2 = sh->ops.target2; + struct r5dev *tgt = &sh->dev[target]; + struct r5dev *tgt2 = &sh->dev[target2]; + struct dma_async_tx_descriptor *tx; + struct page **blocks = percpu->scribble; + struct async_submit_ctl submit; + + pr_debug("%s: stripe %llu block1: %d block2: %d\n", + __func__, (unsigned long long)sh->sector, target, target2); + BUG_ON(target < 0 || target2 < 0); + BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); + BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags)); + + /* we need to open-code set_syndrome_sources to handle the + * slot number conversion for 'faila' and 'failb' + */ + for (i = 0; i < disks ; i++) + blocks[i] = (void *)raid6_empty_zero_page; + count = 0; + i = d0_idx; + do { + int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); + + blocks[slot] = sh->dev[i].page; + + if (i == target) + faila = slot; + if (i == target2) + failb = slot; + i = raid6_next_disk(i, disks); + } while (i != d0_idx); + BUG_ON(count != syndrome_disks); + + BUG_ON(faila == failb); + if (failb < faila) + swap(faila, failb); + pr_debug("%s: stripe: %llu faila: %d failb: %d\n", + __func__, (unsigned long long)sh->sector, faila, failb); + + atomic_inc(&sh->count); + + if (failb == syndrome_disks+1) { + /* Q disk is one of the missing disks */ + if (faila == syndrome_disks) { + /* Missing P+Q, just recompute */ + init_async_submit(&submit, ASYNC_TX_FENCE, NULL, + ops_complete_compute, sh, + to_addr_conv(sh, percpu)); + return async_gen_syndrome(blocks, 0, count+2, + STRIPE_SIZE, &submit); + } else { + struct page *dest; + int data_target; + int qd_idx = sh->qd_idx; + + /* Missing D+Q: recompute D from P, then recompute Q */ + if (target == qd_idx) + data_target = target2; + else + data_target = target; + + count = 0; + for (i = disks; i-- ; ) { + if (i == data_target || i == qd_idx) + continue; + blocks[count++] = sh->dev[i].page; + } + dest = sh->dev[data_target].page; + init_async_submit(&submit, + ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, + NULL, NULL, NULL, + to_addr_conv(sh, percpu)); + tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, + &submit); + + count = set_syndrome_sources(blocks, sh); + init_async_submit(&submit, ASYNC_TX_FENCE, tx, + ops_complete_compute, sh, + to_addr_conv(sh, percpu)); + return async_gen_syndrome(blocks, 0, count+2, + STRIPE_SIZE, &submit); + } + } else { + init_async_submit(&submit, ASYNC_TX_FENCE, NULL, + ops_complete_compute, sh, + to_addr_conv(sh, percpu)); + if (failb == syndrome_disks) { + /* We're missing D+P. */ + return async_raid6_datap_recov(syndrome_disks+2, + STRIPE_SIZE, faila, + blocks, &submit); + } else { + /* We're missing D+D. */ + return async_raid6_2data_recov(syndrome_disks+2, + STRIPE_SIZE, faila, failb, + blocks, &submit); + } + } +} + + static void ops_complete_prexor(void *stripe_head_ref) { struct stripe_head *sh = stripe_head_ref; @@ -680,12 +907,13 @@ static void ops_complete_prexor(void *stripe_head_ref) } static struct dma_async_tx_descriptor * -ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) +ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu, + struct dma_async_tx_descriptor *tx) { - /* kernel stack size limits the total number of disks */ int disks = sh->disks; - struct page *xor_srcs[disks]; + struct page **xor_srcs = percpu->scribble; int count = 0, pd_idx = sh->pd_idx, i; + struct async_submit_ctl submit; /* existing parity data subtracted */ struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; @@ -700,9 +928,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) xor_srcs[count++] = dev->page; } - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, - ops_complete_prexor, sh); + init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, + ops_complete_prexor, sh, to_addr_conv(sh, percpu)); + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); return tx; } @@ -742,17 +970,21 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) return tx; } -static void ops_complete_postxor(void *stripe_head_ref) +static void ops_complete_reconstruct(void *stripe_head_ref) { struct stripe_head *sh = stripe_head_ref; - int disks = sh->disks, i, pd_idx = sh->pd_idx; + int disks = sh->disks; + int pd_idx = sh->pd_idx; + int qd_idx = sh->qd_idx; + int i; pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; - if (dev->written || i == pd_idx) + + if (dev->written || i == pd_idx || i == qd_idx) set_bit(R5_UPTODATE, &dev->flags); } @@ -770,12 +1002,12 @@ static void ops_complete_postxor(void *stripe_head_ref) } static void -ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) +ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu, + struct dma_async_tx_descriptor *tx) { - /* kernel stack size limits the total number of disks */ int disks = sh->disks; - struct page *xor_srcs[disks]; - + struct page **xor_srcs = percpu->scribble; + struct async_submit_ctl submit; int count = 0, pd_idx = sh->pd_idx, i; struct page *xor_dest; int prexor = 0; @@ -809,18 +1041,36 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST * for the synchronous xor case */ - flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | + flags = ASYNC_TX_ACK | (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); atomic_inc(&sh->count); - if (unlikely(count == 1)) { - flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, - flags, tx, ops_complete_postxor, sh); - } else - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - flags, tx, ops_complete_postxor, sh); + init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh, + to_addr_conv(sh, percpu)); + if (unlikely(count == 1)) + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); + else + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); +} + +static void +ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu, + struct dma_async_tx_descriptor *tx) +{ + struct async_submit_ctl submit; + struct page **blocks = percpu->scribble; + int count; + + pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); + + count = set_syndrome_sources(blocks, sh); + + atomic_inc(&sh->count); + + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct, + sh, to_addr_conv(sh, percpu)); + async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); } static void ops_complete_check(void *stripe_head_ref) @@ -835,63 +1085,115 @@ static void ops_complete_check(void *stripe_head_ref) release_stripe(sh); } -static void ops_run_check(struct stripe_head *sh) +static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu) { - /* kernel stack size limits the total number of disks */ int disks = sh->disks; - struct page *xor_srcs[disks]; + int pd_idx = sh->pd_idx; + int qd_idx = sh->qd_idx; + struct page *xor_dest; + struct page **xor_srcs = percpu->scribble; struct dma_async_tx_descriptor *tx; - - int count = 0, pd_idx = sh->pd_idx, i; - struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; + struct async_submit_ctl submit; + int count; + int i; pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); + count = 0; + xor_dest = sh->dev[pd_idx].page; + xor_srcs[count++] = xor_dest; for (i = disks; i--; ) { - struct r5dev *dev = &sh->dev[i]; - if (i != pd_idx) - xor_srcs[count++] = dev->page; + if (i == pd_idx || i == qd_idx) + continue; + xor_srcs[count++] = sh->dev[i].page; } - tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); + init_async_submit(&submit, 0, NULL, NULL, NULL, + to_addr_conv(sh, percpu)); + tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, + &sh->ops.zero_sum_result, &submit); + + atomic_inc(&sh->count); + init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL); + tx = async_trigger_callback(&submit); +} + +static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp) +{ + struct page **srcs = percpu->scribble; + struct async_submit_ctl submit; + int count; + + pr_debug("%s: stripe %llu checkp: %d\n", __func__, + (unsigned long long)sh->sector, checkp); + + count = set_syndrome_sources(srcs, sh); + if (!checkp) + srcs[count] = NULL; atomic_inc(&sh->count); - tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, - ops_complete_check, sh); + init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check, + sh, to_addr_conv(sh, percpu)); + async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE, + &sh->ops.zero_sum_result, percpu->spare_page, &submit); } -static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) +static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) { int overlap_clear = 0, i, disks = sh->disks; struct dma_async_tx_descriptor *tx = NULL; + raid5_conf_t *conf = sh->raid_conf; + int level = conf->level; + struct raid5_percpu *percpu; + unsigned long cpu; + cpu = get_cpu(); + percpu = per_cpu_ptr(conf->percpu, cpu); if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { ops_run_biofill(sh); overlap_clear++; } if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { - tx = ops_run_compute5(sh); - /* terminate the chain if postxor is not set to be run */ - if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request)) + if (level < 6) + tx = ops_run_compute5(sh, percpu); + else { + if (sh->ops.target2 < 0 || sh->ops.target < 0) + tx = ops_run_compute6_1(sh, percpu); + else + tx = ops_run_compute6_2(sh, percpu); + } + /* terminate the chain if reconstruct is not set to be run */ + if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) async_tx_ack(tx); } if (test_bit(STRIPE_OP_PREXOR, &ops_request)) - tx = ops_run_prexor(sh, tx); + tx = ops_run_prexor(sh, percpu, tx); if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { tx = ops_run_biodrain(sh, tx); overlap_clear++; } - if (test_bit(STRIPE_OP_POSTXOR, &ops_request)) - ops_run_postxor(sh, tx); + if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) { + if (level < 6) + ops_run_reconstruct5(sh, percpu, tx); + else + ops_run_reconstruct6(sh, percpu, tx); + } - if (test_bit(STRIPE_OP_CHECK, &ops_request)) - ops_run_check(sh); + if (test_bit(STRIPE_OP_CHECK, &ops_request)) { + if (sh->check_state == check_state_run) + ops_run_check_p(sh, percpu); + else if (sh->check_state == check_state_run_q) + ops_run_check_pq(sh, percpu, 0); + else if (sh->check_state == check_state_run_pq) + ops_run_check_pq(sh, percpu, 1); + else + BUG(); + } if (overlap_clear) for (i = disks; i--; ) { @@ -899,6 +1201,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) if (test_and_clear_bit(R5_Overlap, &dev->flags)) wake_up(&sh->raid_conf->wait_for_overlap); } + put_cpu(); } static int grow_one_stripe(raid5_conf_t *conf) @@ -948,6 +1251,28 @@ static int grow_stripes(raid5_conf_t *conf, int num) return 0; } +/** + * scribble_len - return the required size of the scribble region + * @num - total number of disks in the array + * + * The size must be enough to contain: + * 1/ a struct page pointer for each device in the array +2 + * 2/ room to convert each entry in (1) to its corresponding dma + * (dma_map_page()) or page (page_address()) address. + * + * Note: the +2 is for the destination buffers of the ddf/raid6 case where we + * calculate over all devices (not just the data blocks), using zeros in place + * of the P and Q blocks. + */ +static size_t scribble_len(int num) +{ + size_t len; + + len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2); + + return len; +} + static int resize_stripes(raid5_conf_t *conf, int newsize) { /* Make all the stripes able to hold 'newsize' devices. @@ -976,6 +1301,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) struct stripe_head *osh, *nsh; LIST_HEAD(newstripes); struct disk_info *ndisks; + unsigned long cpu; int err; struct kmem_cache *sc; int i; @@ -1041,7 +1367,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) /* Step 3. * At this point, we are holding all the stripes so the array * is completely stalled, so now is a good time to resize - * conf->disks. + * conf->disks and the scribble region */ ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); if (ndisks) { @@ -1052,10 +1378,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) } else err = -ENOMEM; + get_online_cpus(); + conf->scribble_len = scribble_len(newsize); + for_each_present_cpu(cpu) { + struct raid5_percpu *percpu; + void *scribble; + + percpu = per_cpu_ptr(conf->percpu, cpu); + scribble = kmalloc(conf->scribble_len, GFP_NOIO); + + if (scribble) { + kfree(percpu->scribble); + percpu->scribble = scribble; + } else { + err = -ENOMEM; + break; + } + } + put_online_cpus(); + /* Step 4, return new stripes to service */ while(!list_empty(&newstripes)) { nsh = list_entry(newstripes.next, struct stripe_head, lru); list_del_init(&nsh->lru); + for (i=conf->raid_disks; i < newsize; i++) if (nsh->dev[i].page == NULL) { struct page *p = alloc_page(GFP_NOIO); @@ -1594,258 +1940,13 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) } - -/* - * Copy data between a page in the stripe cache, and one or more bion - * The page could align with the middle of the bio, or there could be - * several bion, each with several bio_vecs, which cover part of the page - * Multiple bion are linked together on bi_next. There may be extras - * at the end of this list. We ignore them. - */ -static void copy_data(int frombio, struct bio *bio, - struct page *page, - sector_t sector) -{ - char *pa = page_address(page); - struct bio_vec *bvl; - int i; - int page_offset; - - if (bio->bi_sector >= sector) - page_offset = (signed)(bio->bi_sector - sector) * 512; - else - page_offset = (signed)(sector - bio->bi_sector) * -512; - bio_for_each_segment(bvl, bio, i) { - int len = bio_iovec_idx(bio,i)->bv_len; - int clen; - int b_offset = 0; - - if (page_offset < 0) { - b_offset = -page_offset; - page_offset += b_offset; - len -= b_offset; - } - - if (len > 0 && page_offset + len > STRIPE_SIZE) - clen = STRIPE_SIZE - page_offset; - else clen = len; - - if (clen > 0) { - char *ba = __bio_kmap_atomic(bio, i, KM_USER0); - if (frombio) - memcpy(pa+page_offset, ba+b_offset, clen); - else - memcpy(ba+b_offset, pa+page_offset, clen); - __bio_kunmap_atomic(ba, KM_USER0); - } - if (clen < len) /* hit end of page */ - break; - page_offset += len; - } -} - -#define check_xor() do { \ - if (count == MAX_XOR_BLOCKS) { \ - xor_blocks(count, STRIPE_SIZE, dest, ptr);\ - count = 0; \ - } \ - } while(0) - -static void compute_parity6(struct stripe_head *sh, int method) -{ - raid5_conf_t *conf = sh->raid_conf; - int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count; - int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); - struct bio *chosen; - /**** FIX THIS: This could be very bad if disks is close to 256 ****/ - void *ptrs[syndrome_disks+2]; - - pd_idx = sh->pd_idx; - qd_idx = sh->qd_idx; - d0_idx = raid6_d0(sh); - - pr_debug("compute_parity, stripe %llu, method %d\n", - (unsigned long long)sh->sector, method); - - switch(method) { - case READ_MODIFY_WRITE: - BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */ - case RECONSTRUCT_WRITE: - for (i= disks; i-- ;) - if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) { - chosen = sh->dev[i].towrite; - sh->dev[i].towrite = NULL; - - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) - wake_up(&conf->wait_for_overlap); - - BUG_ON(sh->dev[i].written); - sh->dev[i].written = chosen; - } - break; - case CHECK_PARITY: - BUG(); /* Not implemented yet */ - } - - for (i = disks; i--;) - if (sh->dev[i].written) { - sector_t sector = sh->dev[i].sector; - struct bio *wbi = sh->dev[i].written; - while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) { - copy_data(1, wbi, sh->dev[i].page, sector); - wbi = r5_next_bio(wbi, sector); - } - - set_bit(R5_LOCKED, &sh->dev[i].flags); - set_bit(R5_UPTODATE, &sh->dev[i].flags); - } - - /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/ - - for (i = 0; i < disks; i++) - ptrs[i] = (void *)raid6_empty_zero_page; - - count = 0; - i = d0_idx; - do { - int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); - - ptrs[slot] = page_address(sh->dev[i].page); - if (slot < syndrome_disks && - !test_bit(R5_UPTODATE, &sh->dev[i].flags)) { - printk(KERN_ERR "block %d/%d not uptodate " - "on parity calc\n", i, count); - BUG(); - } - - i = raid6_next_disk(i, disks); - } while (i != d0_idx); - BUG_ON(count != syndrome_disks); - - raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs); - - switch(method) { - case RECONSTRUCT_WRITE: - set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); - set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags); - set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); - set_bit(R5_LOCKED, &sh->dev[qd_idx].flags); - break; - case UPDATE_PARITY: - set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); - set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags); - break; - } -} - - -/* Compute one missing block */ -static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) -{ - int i, count, disks = sh->disks; - void *ptr[MAX_XOR_BLOCKS], *dest, *p; - int qd_idx = sh->qd_idx; - - pr_debug("compute_block_1, stripe %llu, idx %d\n", - (unsigned long long)sh->sector, dd_idx); - - if ( dd_idx == qd_idx ) { - /* We're actually computing the Q drive */ - compute_parity6(sh, UPDATE_PARITY); - } else { - dest = page_address(sh->dev[dd_idx].page); - if (!nozero) memset(dest, 0, STRIPE_SIZE); - count = 0; - for (i = disks ; i--; ) { - if (i == dd_idx || i == qd_idx) - continue; - p = page_address(sh->dev[i].page); - if (test_bit(R5_UPTODATE, &sh->dev[i].flags)) - ptr[count++] = p; - else - printk("compute_block() %d, stripe %llu, %d" - " not present\n", dd_idx, - (unsigned long long)sh->sector, i); - - check_xor(); - } - if (count) - xor_blocks(count, STRIPE_SIZE, dest, ptr); - if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); - else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); - } -} - -/* Compute two missing blocks */ -static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) -{ - int i, count, disks = sh->disks; - int syndrome_disks = sh->ddf_layout ? disks : disks-2; - int d0_idx = raid6_d0(sh); - int faila = -1, failb = -1; - /**** FIX THIS: This could be very bad if disks is close to 256 ****/ - void *ptrs[syndrome_disks+2]; - - for (i = 0; i < disks ; i++) - ptrs[i] = (void *)raid6_empty_zero_page; - count = 0; - i = d0_idx; - do { - int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); - - ptrs[slot] = page_address(sh->dev[i].page); - - if (i == dd_idx1) - faila = slot; - if (i == dd_idx2) - failb = slot; - i = raid6_next_disk(i, disks); - } while (i != d0_idx); - BUG_ON(count != syndrome_disks); - - BUG_ON(faila == failb); - if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } - - pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", - (unsigned long long)sh->sector, dd_idx1, dd_idx2, - faila, failb); - - if (failb == syndrome_disks+1) { - /* Q disk is one of the missing disks */ - if (faila == syndrome_disks) { - /* Missing P+Q, just recompute */ - compute_parity6(sh, UPDATE_PARITY); - return; - } else { - /* We're missing D+Q; recompute D from P */ - compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ? - dd_idx2 : dd_idx1), - 0); - compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */ - return; - } - } - - /* We're missing D+P or D+D; */ - if (failb == syndrome_disks) { - /* We're missing D+P. */ - raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs); - } else { - /* We're missing D+D. */ - raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb, - ptrs); - } - - /* Both the above update both missing blocks */ - set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags); - set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags); -} - static void -schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, +schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, int rcw, int expand) { int i, pd_idx = sh->pd_idx, disks = sh->disks; + raid5_conf_t *conf = sh->raid_conf; + int level = conf->level; if (rcw) { /* if we are not expanding this is a proper write request, and @@ -1858,7 +1959,7 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, } else sh->reconstruct_state = reconstruct_state_run; - set_bit(STRIPE_OP_POSTXOR, &s->ops_request); + set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; @@ -1871,17 +1972,18 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, s->locked++; } } - if (s->locked + 1 == disks) + if (s->locked + conf->max_degraded == disks) if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) - atomic_inc(&sh->raid_conf->pending_full_writes); + atomic_inc(&conf->pending_full_writes); } else { + BUG_ON(level == 6); BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); sh->reconstruct_state = reconstruct_state_prexor_drain_run; set_bit(STRIPE_OP_PREXOR, &s->ops_request); set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); - set_bit(STRIPE_OP_POSTXOR, &s->ops_request); + set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; @@ -1899,13 +2001,22 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, } } - /* keep the parity disk locked while asynchronous operations + /* keep the parity disk(s) locked while asynchronous operations * are in flight */ set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); s->locked++; + if (level == 6) { + int qd_idx = sh->qd_idx; + struct r5dev *dev = &sh->dev[qd_idx]; + + set_bit(R5_LOCKED, &dev->flags); + clear_bit(R5_UPTODATE, &dev->flags); + s->locked++; + } + pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", __func__, (unsigned long long)sh->sector, s->locked, s->ops_request); @@ -1986,13 +2097,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in static void end_reshape(raid5_conf_t *conf); -static int page_is_zero(struct page *p) -{ - char *a = page_address(p); - return ((*(u32*)a) == 0 && - memcmp(a, a+4, STRIPE_SIZE-4)==0); -} - static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, struct stripe_head *sh) { @@ -2132,9 +2236,10 @@ static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s, set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); set_bit(R5_Wantcompute, &dev->flags); sh->ops.target = disk_idx; + sh->ops.target2 = -1; s->req_compute = 1; /* Careful: from this point on 'uptodate' is in the eye - * of raid5_run_ops which services 'compute' operations + * of raid_run_ops which services 'compute' operations * before writes. R5_Wantcompute flags a block that will * be R5_UPTODATE by the time it is needed for a * subsequent operation. @@ -2173,61 +2278,104 @@ static void handle_stripe_fill5(struct stripe_head *sh, set_bit(STRIPE_HANDLE, &sh->state); } -static void handle_stripe_fill6(struct stripe_head *sh, - struct stripe_head_state *s, struct r6_state *r6s, - int disks) +/* fetch_block6 - checks the given member device to see if its data needs + * to be read or computed to satisfy a request. + * + * Returns 1 when no more member devices need to be checked, otherwise returns + * 0 to tell the loop in handle_stripe_fill6 to continue + */ +static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s, + struct r6_state *r6s, int disk_idx, int disks) { - int i; - for (i = disks; i--; ) { - struct r5dev *dev = &sh->dev[i]; - if (!test_bit(R5_LOCKED, &dev->flags) && - !test_bit(R5_UPTODATE, &dev->flags) && - (dev->toread || (dev->towrite && - !test_bit(R5_OVERWRITE, &dev->flags)) || - s->syncing || s->expanding || - (s->failed >= 1 && - (sh->dev[r6s->failed_num[0]].toread || - s->to_write)) || - (s->failed >= 2 && - (sh->dev[r6s->failed_num[1]].toread || - s->to_write)))) { - /* we would like to get this block, possibly - * by computing it, but we might not be able to + struct r5dev *dev = &sh->dev[disk_idx]; + struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]], + &sh->dev[r6s->failed_num[1]] }; + + if (!test_bit(R5_LOCKED, &dev->flags) && + !test_bit(R5_UPTODATE, &dev->flags) && + (dev->toread || + (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || + s->syncing || s->expanding || + (s->failed >= 1 && + (fdev[0]->toread || s->to_write)) || + (s->failed >= 2 && + (fdev[1]->toread || s->to_write)))) { + /* we would like to get this block, possibly by computing it, + * otherwise read it if the backing disk is insync + */ + BUG_ON(test_bit(R5_Wantcompute, &dev->flags)); + BUG_ON(test_bit(R5_Wantread, &dev->flags)); + if ((s->uptodate == disks - 1) && + (s->failed && (disk_idx == r6s->failed_num[0] || + disk_idx == r6s->failed_num[1]))) { + /* have disk failed, and we're requested to fetch it; + * do compute it */ - if ((s->uptodate == disks - 1) && - (s->failed && (i == r6s->failed_num[0] || - i == r6s->failed_num[1]))) { - pr_debug("Computing stripe %llu block %d\n", - (unsigned long long)sh->sector, i); - compute_block_1(sh, i, 0); - s->uptodate++; - } else if ( s->uptodate == disks-2 && s->failed >= 2 ) { - /* Computing 2-failure is *very* expensive; only - * do it if failed >= 2 - */ - int other; - for (other = disks; other--; ) { - if (other == i) - continue; - if (!test_bit(R5_UPTODATE, - &sh->dev[other].flags)) - break; - } - BUG_ON(other < 0); - pr_debug("Computing stripe %llu blocks %d,%d\n", - (unsigned long long)sh->sector, - i, other); - compute_block_2(sh, i, other); - s->uptodate += 2; - } else if (test_bit(R5_Insync, &dev->flags)) { - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantread, &dev->flags); - s->locked++; - pr_debug("Reading block %d (sync=%d)\n", - i, s->syncing); + pr_debug("Computing stripe %llu block %d\n", + (unsigned long long)sh->sector, disk_idx); + set_bit(STRIPE_COMPUTE_RUN, &sh->state); + set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); + set_bit(R5_Wantcompute, &dev->flags); + sh->ops.target = disk_idx; + sh->ops.target2 = -1; /* no 2nd target */ + s->req_compute = 1; + s->uptodate++; + return 1; + } else if (s->uptodate == disks-2 && s->failed >= 2) { + /* Computing 2-failure is *very* expensive; only + * do it if failed >= 2 + */ + int other; + for (other = disks; other--; ) { + if (other == disk_idx) + continue; + if (!test_bit(R5_UPTODATE, + &sh->dev[other].flags)) + break; } + BUG_ON(other < 0); + pr_debug("Computing stripe %llu blocks %d,%d\n", + (unsigned long long)sh->sector, + disk_idx, other); + set_bit(STRIPE_COMPUTE_RUN, &sh->state); + set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); + set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags); + set_bit(R5_Wantcompute, &sh->dev[other].flags); + sh->ops.target = disk_idx; + sh->ops.target2 = other; + s->uptodate += 2; + s->req_compute = 1; + return 1; + } else if (test_bit(R5_Insync, &dev->flags)) { + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantread, &dev->flags); + s->locked++; + pr_debug("Reading block %d (sync=%d)\n", + disk_idx, s->syncing); } } + + return 0; +} + +/** + * handle_stripe_fill6 - read or compute data to satisfy pending requests. + */ +static void handle_stripe_fill6(struct stripe_head *sh, + struct stripe_head_state *s, struct r6_state *r6s, + int disks) +{ + int i; + + /* look for blocks to read/compute, skip this if a compute + * is already in flight, or if the stripe contents are in the + * midst of changing due to a write + */ + if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && + !sh->reconstruct_state) + for (i = disks; i--; ) + if (fetch_block6(sh, s, r6s, i, disks)) + break; set_bit(STRIPE_HANDLE, &sh->state); } @@ -2361,114 +2509,61 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf, */ /* since handle_stripe can be called at any time we need to handle the * case where a compute block operation has been submitted and then a - * subsequent call wants to start a write request. raid5_run_ops only - * handles the case where compute block and postxor are requested + * subsequent call wants to start a write request. raid_run_ops only + * handles the case where compute block and reconstruct are requested * simultaneously. If this is not the case then new writes need to be * held off until the compute completes. */ if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && (s->locked == 0 && (rcw == 0 || rmw == 0) && !test_bit(STRIPE_BIT_DELAY, &sh->state))) - schedule_reconstruction5(sh, s, rcw == 0, 0); + schedule_reconstruction(sh, s, rcw == 0, 0); } static void handle_stripe_dirtying6(raid5_conf_t *conf, struct stripe_head *sh, struct stripe_head_state *s, struct r6_state *r6s, int disks) { - int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; + int rcw = 0, pd_idx = sh->pd_idx, i; int qd_idx = sh->qd_idx; + + set_bit(STRIPE_HANDLE, &sh->state); for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; - /* Would I have to read this buffer for reconstruct_write */ - if (!test_bit(R5_OVERWRITE, &dev->flags) - && i != pd_idx && i != qd_idx - && (!test_bit(R5_LOCKED, &dev->flags) - ) && - !test_bit(R5_UPTODATE, &dev->flags)) { - if (test_bit(R5_Insync, &dev->flags)) rcw++; - else { - pr_debug("raid6: must_compute: " - "disk %d flags=%#lx\n", i, dev->flags); - must_compute++; + /* check if we haven't enough data */ + if (!test_bit(R5_OVERWRITE, &dev->flags) && + i != pd_idx && i != qd_idx && + !test_bit(R5_LOCKED, &dev->flags) && + !(test_bit(R5_UPTODATE, &dev->flags) || + test_bit(R5_Wantcompute, &dev->flags))) { + rcw++; + if (!test_bit(R5_Insync, &dev->flags)) + continue; /* it's a failed drive */ + + if ( + test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { + pr_debug("Read_old stripe %llu " + "block %d for Reconstruct\n", + (unsigned long long)sh->sector, i); + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantread, &dev->flags); + s->locked++; + } else { + pr_debug("Request delayed stripe %llu " + "block %d for Reconstruct\n", + (unsigned long long)sh->sector, i); + set_bit(STRIPE_DELAYED, &sh->state); + set_bit(STRIPE_HANDLE, &sh->state); } } } - pr_debug("for sector %llu, rcw=%d, must_compute=%d\n", - (unsigned long long)sh->sector, rcw, must_compute); - set_bit(STRIPE_HANDLE, &sh->state); - - if (rcw > 0) - /* want reconstruct write, but need to get some data */ - for (i = disks; i--; ) { - struct r5dev *dev = &sh->dev[i]; - if (!test_bit(R5_OVERWRITE, &dev->flags) - && !(s->failed == 0 && (i == pd_idx || i == qd_idx)) - && !test_bit(R5_LOCKED, &dev->flags) && - !test_bit(R5_UPTODATE, &dev->flags) && - test_bit(R5_Insync, &dev->flags)) { - if ( - test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { - pr_debug("Read_old stripe %llu " - "block %d for Reconstruct\n", - (unsigned long long)sh->sector, i); - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantread, &dev->flags); - s->locked++; - } else { - pr_debug("Request delayed stripe %llu " - "block %d for Reconstruct\n", - (unsigned long long)sh->sector, i); - set_bit(STRIPE_DELAYED, &sh->state); - set_bit(STRIPE_HANDLE, &sh->state); - } - } - } /* now if nothing is locked, and if we have enough data, we can start a * write request */ - if (s->locked == 0 && rcw == 0 && + if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && + s->locked == 0 && rcw == 0 && !test_bit(STRIPE_BIT_DELAY, &sh->state)) { - if (must_compute > 0) { - /* We have failed blocks and need to compute them */ - switch (s->failed) { - case 0: - BUG(); - case 1: - compute_block_1(sh, r6s->failed_num[0], 0); - break; - case 2: - compute_block_2(sh, r6s->failed_num[0], - r6s->failed_num[1]); - break; - default: /* This request should have been failed? */ - BUG(); - } - } - - pr_debug("Computing parity for stripe %llu\n", - (unsigned long long)sh->sector); - compute_parity6(sh, RECONSTRUCT_WRITE); - /* now every locked buffer is ready to be written */ - for (i = disks; i--; ) - if (test_bit(R5_LOCKED, &sh->dev[i].flags)) { - pr_debug("Writing stripe %llu block %d\n", - (unsigned long long)sh->sector, i); - s->locked++; - set_bit(R5_Wantwrite, &sh->dev[i].flags); - } - if (s->locked == disks) - if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) - atomic_inc(&conf->pending_full_writes); - /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ - set_bit(STRIPE_INSYNC, &sh->state); - - if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { - atomic_dec(&conf->preread_active_stripes); - if (atomic_read(&conf->preread_active_stripes) < - IO_THRESHOLD) - md_wakeup_thread(conf->mddev->thread); - } + schedule_reconstruction(sh, s, 1, 0); } } @@ -2527,7 +2622,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, * we are done. Otherwise update the mismatch count and repair * parity if !MD_RECOVERY_CHECK */ - if (sh->ops.zero_sum_result == 0) + if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0) /* parity is correct (on disc, * not in buffer any more) */ @@ -2544,6 +2639,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, set_bit(R5_Wantcompute, &sh->dev[sh->pd_idx].flags); sh->ops.target = sh->pd_idx; + sh->ops.target2 = -1; s->uptodate++; } } @@ -2560,67 +2656,74 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, - struct stripe_head_state *s, - struct r6_state *r6s, struct page *tmp_page, - int disks) + struct stripe_head_state *s, + struct r6_state *r6s, int disks) { - int update_p = 0, update_q = 0; - struct r5dev *dev; int pd_idx = sh->pd_idx; int qd_idx = sh->qd_idx; + struct r5dev *dev; set_bit(STRIPE_HANDLE, &sh->state); BUG_ON(s->failed > 2); - BUG_ON(s->uptodate < disks); + /* Want to check and possibly repair P and Q. * However there could be one 'failed' device, in which * case we can only check one of them, possibly using the * other to generate missing data */ - /* If !tmp_page, we cannot do the calculations, - * but as we have set STRIPE_HANDLE, we will soon be called - * by stripe_handle with a tmp_page - just wait until then. - */ - if (tmp_page) { + switch (sh->check_state) { + case check_state_idle: + /* start a new check operation if there are < 2 failures */ if (s->failed == r6s->q_failed) { - /* The only possible failed device holds 'Q', so it + /* The only possible failed device holds Q, so it * makes sense to check P (If anything else were failed, * we would have used P to recreate it). */ - compute_block_1(sh, pd_idx, 1); - if (!page_is_zero(sh->dev[pd_idx].page)) { - compute_block_1(sh, pd_idx, 0); - update_p = 1; - } + sh->check_state = check_state_run; } if (!r6s->q_failed && s->failed < 2) { - /* q is not failed, and we didn't use it to generate + /* Q is not failed, and we didn't use it to generate * anything, so it makes sense to check it */ - memcpy(page_address(tmp_page), - page_address(sh->dev[qd_idx].page), - STRIPE_SIZE); - compute_parity6(sh, UPDATE_PARITY); - if (memcmp(page_address(tmp_page), - page_address(sh->dev[qd_idx].page), - STRIPE_SIZE) != 0) { - clear_bit(STRIPE_INSYNC, &sh->state); - update_q = 1; - } + if (sh->check_state == check_state_run) + sh->check_state = check_state_run_pq; + else + sh->check_state = check_state_run_q; } - if (update_p || update_q) { - conf->mddev->resync_mismatches += STRIPE_SECTORS; - if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) - /* don't try to repair!! */ - update_p = update_q = 0; + + /* discard potentially stale zero_sum_result */ + sh->ops.zero_sum_result = 0; + + if (sh->check_state == check_state_run) { + /* async_xor_zero_sum destroys the contents of P */ + clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); + s->uptodate--; + } + if (sh->check_state >= check_state_run && + sh->check_state <= check_state_run_pq) { + /* async_syndrome_zero_sum preserves P and Q, so + * no need to mark them !uptodate here + */ + set_bit(STRIPE_OP_CHECK, &s->ops_request); + break; } + /* we have 2-disk failure */ + BUG_ON(s->failed != 2); + /* fall through */ + case check_state_compute_result: + sh->check_state = check_state_idle; + + /* check that a write has not made the stripe insync */ + if (test_bit(STRIPE_INSYNC, &sh->state)) + break; + /* now write out any block on a failed drive, - * or P or Q if they need it + * or P or Q if they were recomputed */ - + BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */ if (s->failed == 2) { dev = &sh->dev[r6s->failed_num[1]]; s->locked++; @@ -2633,14 +2736,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); } - - if (update_p) { + if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { dev = &sh->dev[pd_idx]; s->locked++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); } - if (update_q) { + if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { dev = &sh->dev[qd_idx]; s->locked++; set_bit(R5_LOCKED, &dev->flags); @@ -2649,6 +2751,70 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, clear_bit(STRIPE_DEGRADED, &sh->state); set_bit(STRIPE_INSYNC, &sh->state); + break; + case check_state_run: + case check_state_run_q: + case check_state_run_pq: + break; /* we will be called again upon completion */ + case check_state_check_result: + sh->check_state = check_state_idle; + + /* handle a successful check operation, if parity is correct + * we are done. Otherwise update the mismatch count and repair + * parity if !MD_RECOVERY_CHECK + */ + if (sh->ops.zero_sum_result == 0) { + /* both parities are correct */ + if (!s->failed) + set_bit(STRIPE_INSYNC, &sh->state); + else { + /* in contrast to the raid5 case we can validate + * parity, but still have a failure to write + * back + */ + sh->check_state = check_state_compute_result; + /* Returning at this point means that we may go + * off and bring p and/or q uptodate again so + * we make sure to check zero_sum_result again + * to verify if p or q need writeback + */ + } + } else { + conf->mddev->resync_mismatches += STRIPE_SECTORS; + if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) + /* don't try to repair!! */ + set_bit(STRIPE_INSYNC, &sh->state); + else { + int *target = &sh->ops.target; + + sh->ops.target = -1; + sh->ops.target2 = -1; + sh->check_state = check_state_compute_run; + set_bit(STRIPE_COMPUTE_RUN, &sh->state); + set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); + if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { + set_bit(R5_Wantcompute, + &sh->dev[pd_idx].flags); + *target = pd_idx; + target = &sh->ops.target2; + s->uptodate++; + } + if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { + set_bit(R5_Wantcompute, + &sh->dev[qd_idx].flags); + *target = qd_idx; + s->uptodate++; + } + } + } + break; + case check_state_compute_run: + break; + default: + printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n", + __func__, sh->check_state, + (unsigned long long) sh->sector); + BUG(); } } @@ -2666,6 +2832,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, if (i != sh->pd_idx && i != sh->qd_idx) { int dd_idx, j; struct stripe_head *sh2; + struct async_submit_ctl submit; sector_t bn = compute_blocknr(sh, i, 1); sector_t s = raid5_compute_sector(conf, bn, 0, @@ -2685,9 +2852,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, } /* place all the copies on one channel */ + init_async_submit(&submit, 0, tx, NULL, NULL, NULL); tx = async_memcpy(sh2->dev[dd_idx].page, - sh->dev[i].page, 0, 0, STRIPE_SIZE, - ASYNC_TX_DEP_ACK, tx, NULL, NULL); + sh->dev[i].page, 0, 0, STRIPE_SIZE, + &submit); set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); @@ -2756,7 +2924,8 @@ static bool handle_stripe5(struct stripe_head *sh) rcu_read_lock(); for (i=disks; i--; ) { mdk_rdev_t *rdev; - struct r5dev *dev = &sh->dev[i]; + + dev = &sh->dev[i]; clear_bit(R5_Insync, &dev->flags); pr_debug("check %d: state 0x%lx toread %p read %p write %p " @@ -2973,7 +3142,7 @@ static bool handle_stripe5(struct stripe_head *sh) /* Need to write out all blocks after computing parity */ sh->disks = conf->raid_disks; stripe_set_idx(sh->sector, conf, 0, sh); - schedule_reconstruction5(sh, &s, 1, 1); + schedule_reconstruction(sh, &s, 1, 1); } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { clear_bit(STRIPE_EXPAND_READY, &sh->state); atomic_dec(&conf->reshape_stripes); @@ -2993,7 +3162,7 @@ static bool handle_stripe5(struct stripe_head *sh) md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); if (s.ops_request) - raid5_run_ops(sh, s.ops_request); + raid_run_ops(sh, s.ops_request); ops_run_io(sh, &s); @@ -3002,7 +3171,7 @@ static bool handle_stripe5(struct stripe_head *sh) return blocked_rdev == NULL; } -static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) +static bool handle_stripe6(struct stripe_head *sh) { raid5_conf_t *conf = sh->raid_conf; int disks = sh->disks; @@ -3014,9 +3183,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) mdk_rdev_t *blocked_rdev = NULL; pr_debug("handling stripe %llu, state=%#lx cnt=%d, " - "pd_idx=%d, qd_idx=%d\n", + "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", (unsigned long long)sh->sector, sh->state, - atomic_read(&sh->count), pd_idx, qd_idx); + atomic_read(&sh->count), pd_idx, qd_idx, + sh->check_state, sh->reconstruct_state); memset(&s, 0, sizeof(s)); spin_lock(&sh->lock); @@ -3036,35 +3206,26 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) pr_debug("check %d: state 0x%lx read %p write %p written %p\n", i, dev->flags, dev->toread, dev->towrite, dev->written); - /* maybe we can reply to a read */ - if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { - struct bio *rbi, *rbi2; - pr_debug("Return read for disc %d\n", i); - spin_lock_irq(&conf->device_lock); - rbi = dev->toread; - dev->toread = NULL; - if (test_and_clear_bit(R5_Overlap, &dev->flags)) - wake_up(&conf->wait_for_overlap); - spin_unlock_irq(&conf->device_lock); - while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { - copy_data(0, rbi, dev->page, dev->sector); - rbi2 = r5_next_bio(rbi, dev->sector); - spin_lock_irq(&conf->device_lock); - if (!raid5_dec_bi_phys_segments(rbi)) { - rbi->bi_next = return_bi; - return_bi = rbi; - } - spin_unlock_irq(&conf->device_lock); - rbi = rbi2; - } - } + /* maybe we can reply to a read + * + * new wantfill requests are only permitted while + * ops_complete_biofill is guaranteed to be inactive + */ + if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && + !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) + set_bit(R5_Wantfill, &dev->flags); /* now count some things */ if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; + if (test_bit(R5_Wantcompute, &dev->flags)) { + s.compute++; + BUG_ON(s.compute > 2); + } - - if (dev->toread) + if (test_bit(R5_Wantfill, &dev->flags)) { + s.to_fill++; + } else if (dev->toread) s.to_read++; if (dev->towrite) { s.to_write++; @@ -3105,6 +3266,11 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) blocked_rdev = NULL; } + if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { + set_bit(STRIPE_OP_BIOFILL, &s.ops_request); + set_bit(STRIPE_BIOFILL_RUN, &sh->state); + } + pr_debug("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d,%d\n", s.locked, s.uptodate, s.to_read, s.to_write, s.failed, @@ -3145,19 +3311,62 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) * or to load a block that is being partially written. */ if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || - (s.syncing && (s.uptodate < disks)) || s.expanding) + (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding) handle_stripe_fill6(sh, &s, &r6s, disks); - /* now to consider writing and what else, if anything should be read */ - if (s.to_write) + /* Now we check to see if any write operations have recently + * completed + */ + if (sh->reconstruct_state == reconstruct_state_drain_result) { + int qd_idx = sh->qd_idx; + + sh->reconstruct_state = reconstruct_state_idle; + /* All the 'written' buffers and the parity blocks are ready to + * be written back to disk + */ + BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); + BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags)); + for (i = disks; i--; ) { + dev = &sh->dev[i]; + if (test_bit(R5_LOCKED, &dev->flags) && + (i == sh->pd_idx || i == qd_idx || + dev->written)) { + pr_debug("Writing block %d\n", i); + BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); + set_bit(R5_Wantwrite, &dev->flags); + if (!test_bit(R5_Insync, &dev->flags) || + ((i == sh->pd_idx || i == qd_idx) && + s.failed == 0)) + set_bit(STRIPE_INSYNC, &sh->state); + } + } + if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { + atomic_dec(&conf->preread_active_stripes); + if (atomic_read(&conf->preread_active_stripes) < + IO_THRESHOLD) + md_wakeup_thread(conf->mddev->thread); + } + } + + /* Now to consider new write requests and what else, if anything + * should be read. We do not handle new writes when: + * 1/ A 'write' operation (copy+gen_syndrome) is already in flight. + * 2/ A 'check' operation is in flight, as it may clobber the parity + * block. + */ + if (s.to_write && !sh->reconstruct_state && !sh->check_state) handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); /* maybe we need to check and possibly fix the parity for this stripe * Any reads will already have been scheduled, so we just see if enough - * data is available + * data is available. The parity check is held off while parity + * dependent operations are in flight. */ - if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) - handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks); + if (sh->check_state || + (s.syncing && s.locked == 0 && + !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && + !test_bit(STRIPE_INSYNC, &sh->state))) + handle_parity_checks6(conf, sh, &s, &r6s, disks); if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { md_done_sync(conf->mddev, STRIPE_SECTORS,1); @@ -3178,15 +3387,29 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) set_bit(R5_Wantwrite, &dev->flags); set_bit(R5_ReWrite, &dev->flags); set_bit(R5_LOCKED, &dev->flags); + s.locked++; } else { /* let's read it back */ set_bit(R5_Wantread, &dev->flags); set_bit(R5_LOCKED, &dev->flags); + s.locked++; } } } - if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { + /* Finish reconstruct operations initiated by the expansion process */ + if (sh->reconstruct_state == reconstruct_state_result) { + sh->reconstruct_state = reconstruct_state_idle; + clear_bit(STRIPE_EXPANDING, &sh->state); + for (i = conf->raid_disks; i--; ) { + set_bit(R5_Wantwrite, &sh->dev[i].flags); + set_bit(R5_LOCKED, &sh->dev[i].flags); + s.locked++; + } + } + + if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && + !sh->reconstruct_state) { struct stripe_head *sh2 = get_active_stripe(conf, sh->sector, 1, 1, 1); if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { @@ -3207,14 +3430,8 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) /* Need to write out all blocks after computing P&Q */ sh->disks = conf->raid_disks; stripe_set_idx(sh->sector, conf, 0, sh); - compute_parity6(sh, RECONSTRUCT_WRITE); - for (i = conf->raid_disks ; i-- ; ) { - set_bit(R5_LOCKED, &sh->dev[i].flags); - s.locked++; - set_bit(R5_Wantwrite, &sh->dev[i].flags); - } - clear_bit(STRIPE_EXPANDING, &sh->state); - } else if (s.expanded) { + schedule_reconstruction(sh, &s, 1, 1); + } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { clear_bit(STRIPE_EXPAND_READY, &sh->state); atomic_dec(&conf->reshape_stripes); wake_up(&conf->wait_for_overlap); @@ -3232,6 +3449,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) if (unlikely(blocked_rdev)) md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); + if (s.ops_request) + raid_run_ops(sh, s.ops_request); + ops_run_io(sh, &s); return_io(return_bi); @@ -3240,16 +3460,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) } /* returns true if the stripe was handled */ -static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page) +static bool handle_stripe(struct stripe_head *sh) { if (sh->raid_conf->level == 6) - return handle_stripe6(sh, tmp_page); + return handle_stripe6(sh); else return handle_stripe5(sh); } - - static void raid5_activate_delayed(raid5_conf_t *conf) { if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { @@ -3331,6 +3549,9 @@ static int raid5_congested(void *data, int bits) /* No difference between reads and writes. Just check * how busy the stripe_cache is */ + + if (mddev_congested(mddev, bits)) + return 1; if (conf->inactive_blocked) return 1; if (conf->quiesce) @@ -3880,7 +4101,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped INIT_LIST_HEAD(&stripes); for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { int j; - int skipped = 0; + int skipped_disk = 0; sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1); set_bit(STRIPE_EXPANDING, &sh->state); atomic_inc(&conf->reshape_stripes); @@ -3896,14 +4117,14 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped continue; s = compute_blocknr(sh, j, 0); if (s < raid5_size(mddev, 0, 0)) { - skipped = 1; + skipped_disk = 1; continue; } memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE); set_bit(R5_Expanded, &sh->dev[j].flags); set_bit(R5_UPTODATE, &sh->dev[j].flags); } - if (!skipped) { + if (!skipped_disk) { set_bit(STRIPE_EXPAND_READY, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); } @@ -4057,7 +4278,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski spin_unlock(&sh->lock); /* wait for any blocked device to be handled */ - while(unlikely(!handle_stripe(sh, NULL))) + while (unlikely(!handle_stripe(sh))) ; release_stripe(sh); @@ -4114,7 +4335,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) return handled; } - handle_stripe(sh, NULL); + handle_stripe(sh); release_stripe(sh); handled++; } @@ -4128,6 +4349,36 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) return handled; } +#ifdef CONFIG_MULTICORE_RAID456 +static void __process_stripe(void *param, async_cookie_t cookie) +{ + struct stripe_head *sh = param; + + handle_stripe(sh); + release_stripe(sh); +} + +static void process_stripe(struct stripe_head *sh, struct list_head *domain) +{ + async_schedule_domain(__process_stripe, sh, domain); +} + +static void synchronize_stripe_processing(struct list_head *domain) +{ + async_synchronize_full_domain(domain); +} +#else +static void process_stripe(struct stripe_head *sh, struct list_head *domain) +{ + handle_stripe(sh); + release_stripe(sh); + cond_resched(); +} + +static void synchronize_stripe_processing(struct list_head *domain) +{ +} +#endif /* @@ -4142,6 +4393,7 @@ static void raid5d(mddev_t *mddev) struct stripe_head *sh; raid5_conf_t *conf = mddev->private; int handled; + LIST_HEAD(raid_domain); pr_debug("+++ raid5d active\n"); @@ -4178,8 +4430,7 @@ static void raid5d(mddev_t *mddev) spin_unlock_irq(&conf->device_lock); handled++; - handle_stripe(sh, conf->spare_page); - release_stripe(sh); + process_stripe(sh, &raid_domain); spin_lock_irq(&conf->device_lock); } @@ -4187,6 +4438,7 @@ static void raid5d(mddev_t *mddev) spin_unlock_irq(&conf->device_lock); + synchronize_stripe_processing(&raid_domain); async_tx_issue_pending_all(); unplug_slaves(mddev); @@ -4319,15 +4571,118 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) return sectors * (raid_disks - conf->max_degraded); } +static void raid5_free_percpu(raid5_conf_t *conf) +{ + struct raid5_percpu *percpu; + unsigned long cpu; + + if (!conf->percpu) + return; + + get_online_cpus(); + for_each_possible_cpu(cpu) { + percpu = per_cpu_ptr(conf->percpu, cpu); + safe_put_page(percpu->spare_page); + kfree(percpu->scribble); + } +#ifdef CONFIG_HOTPLUG_CPU + unregister_cpu_notifier(&conf->cpu_notify); +#endif + put_online_cpus(); + + free_percpu(conf->percpu); +} + static void free_conf(raid5_conf_t *conf) { shrink_stripes(conf); - safe_put_page(conf->spare_page); + raid5_free_percpu(conf); kfree(conf->disks); kfree(conf->stripe_hashtbl); kfree(conf); } +#ifdef CONFIG_HOTPLUG_CPU +static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action, + void *hcpu) +{ + raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify); + long cpu = (long)hcpu; + struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu); + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + if (conf->level == 6 && !percpu->spare_page) + percpu->spare_page = alloc_page(GFP_KERNEL); + if (!percpu->scribble) + percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL); + + if (!percpu->scribble || + (conf->level == 6 && !percpu->spare_page)) { + safe_put_page(percpu->spare_page); + kfree(percpu->scribble); + pr_err("%s: failed memory allocation for cpu%ld\n", + __func__, cpu); + return NOTIFY_BAD; + } + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + safe_put_page(percpu->spare_page); + kfree(percpu->scribble); + percpu->spare_page = NULL; + percpu->scribble = NULL; + break; + default: + break; + } + return NOTIFY_OK; +} +#endif + +static int raid5_alloc_percpu(raid5_conf_t *conf) +{ + unsigned long cpu; + struct page *spare_page; + struct raid5_percpu *allcpus; + void *scribble; + int err; + + allcpus = alloc_percpu(struct raid5_percpu); + if (!allcpus) + return -ENOMEM; + conf->percpu = allcpus; + + get_online_cpus(); + err = 0; + for_each_present_cpu(cpu) { + if (conf->level == 6) { + spare_page = alloc_page(GFP_KERNEL); + if (!spare_page) { + err = -ENOMEM; + break; + } + per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page; + } + scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL); + if (!scribble) { + err = -ENOMEM; + break; + } + per_cpu_ptr(conf->percpu, cpu)->scribble = scribble; + } +#ifdef CONFIG_HOTPLUG_CPU + conf->cpu_notify.notifier_call = raid456_cpu_notify; + conf->cpu_notify.priority = 0; + if (err == 0) + err = register_cpu_notifier(&conf->cpu_notify); +#endif + put_online_cpus(); + + return err; +} + static raid5_conf_t *setup_conf(mddev_t *mddev) { raid5_conf_t *conf; @@ -4369,6 +4724,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) goto abort; conf->raid_disks = mddev->raid_disks; + conf->scribble_len = scribble_len(conf->raid_disks); if (mddev->reshape_position == MaxSector) conf->previous_raid_disks = mddev->raid_disks; else @@ -4384,11 +4740,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) goto abort; - if (mddev->new_level == 6) { - conf->spare_page = alloc_page(GFP_KERNEL); - if (!conf->spare_page) - goto abort; - } + conf->level = mddev->new_level; + if (raid5_alloc_percpu(conf) != 0) + goto abort; + spin_lock_init(&conf->device_lock); init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_overlap); @@ -4447,7 +4802,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) printk(KERN_INFO "raid5: allocated %dkB for %s\n", memory, mdname(mddev)); - conf->thread = md_register_thread(raid5d, mddev, "%s_raid5"); + conf->thread = md_register_thread(raid5d, mddev, NULL); if (!conf->thread) { printk(KERN_ERR "raid5: couldn't allocate thread for %s\n", @@ -4613,7 +4968,7 @@ static int run(mddev_t *mddev) set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, - "%s_reshape"); + "reshape"); } /* read-ahead size must cover two whole stripes, which is @@ -5031,7 +5386,7 @@ static int raid5_start_reshape(mddev_t *mddev) set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, - "%s_reshape"); + "reshape"); if (!mddev->sync_thread) { mddev->recovery = 0; spin_lock_irq(&conf->device_lock); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 9459689c4ea..2390e0e83da 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -2,6 +2,7 @@ #define _RAID5_H #include <linux/raid/xor.h> +#include <linux/dmaengine.h> /* * @@ -175,7 +176,9 @@ */ enum check_states { check_state_idle = 0, - check_state_run, /* parity check */ + check_state_run, /* xor parity check */ + check_state_run_q, /* q-parity check */ + check_state_run_pq, /* pq dual parity check */ check_state_check_result, check_state_compute_run, /* parity repair */ check_state_compute_result, @@ -215,8 +218,8 @@ struct stripe_head { * @target - STRIPE_OP_COMPUTE_BLK target */ struct stripe_operations { - int target; - u32 zero_sum_result; + int target, target2; + enum sum_check_flags zero_sum_result; } ops; struct r5dev { struct bio req; @@ -298,7 +301,7 @@ struct r6_state { #define STRIPE_OP_COMPUTE_BLK 1 #define STRIPE_OP_PREXOR 2 #define STRIPE_OP_BIODRAIN 3 -#define STRIPE_OP_POSTXOR 4 +#define STRIPE_OP_RECONSTRUCT 4 #define STRIPE_OP_CHECK 5 /* @@ -385,8 +388,21 @@ struct raid5_private_data { * (fresh device added). * Cleared when a sync completes. */ - - struct page *spare_page; /* Used when checking P/Q in raid6 */ + /* per cpu variables */ + struct raid5_percpu { + struct page *spare_page; /* Used when checking P/Q in raid6 */ + void *scribble; /* space for constructing buffer + * lists and performing address + * conversions + */ + } *percpu; + size_t scribble_len; /* size of scribble region must be + * associated with conf to handle + * cpu hotplug while reshaping + */ +#ifdef CONFIG_HOTPLUG_CPU + struct notifier_block cpu_notify; +#endif /* * Free stripes pool diff --git a/drivers/media/dvb/dvb-core/dvbdev.h b/drivers/media/dvb/dvb-core/dvbdev.h index 895e2efca8a..01fc7048474 100644 --- a/drivers/media/dvb/dvb-core/dvbdev.h +++ b/drivers/media/dvb/dvb-core/dvbdev.h @@ -31,10 +31,9 @@ #define DVB_MAJOR 212 #if defined(CONFIG_DVB_MAX_ADAPTERS) && CONFIG_DVB_MAX_ADAPTERS > 0 -#define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS + #define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS #else -#warning invalid CONFIG_DVB_MAX_ADAPTERS value -#define DVB_MAX_ADAPTERS 8 + #define DVB_MAX_ADAPTERS 8 #endif #define DVB_UNSET (-1) diff --git a/drivers/media/dvb/dvb-usb/Kconfig b/drivers/media/dvb/dvb-usb/Kconfig index 0e4b97fba38..9744b069241 100644 --- a/drivers/media/dvb/dvb-usb/Kconfig +++ b/drivers/media/dvb/dvb-usb/Kconfig @@ -75,7 +75,7 @@ config DVB_USB_DIB0700 select DVB_DIB3000MC if !DVB_FE_CUSTOMISE select DVB_S5H1411 if !DVB_FE_CUSTOMISE select DVB_LGDT3305 if !DVB_FE_CUSTOMISE - select DVB_TUNER_DIB0070 if !DVB_FE_CUSTOMISE + select DVB_TUNER_DIB0070 select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMISE select MEDIA_TUNER_MT2266 if !MEDIA_TUNER_CUSTOMISE select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMISE diff --git a/drivers/media/video/saa7164/saa7164-api.c b/drivers/media/video/saa7164/saa7164-api.c index bb6df1b276b..6f094a96ac8 100644 --- a/drivers/media/video/saa7164/saa7164-api.c +++ b/drivers/media/video/saa7164/saa7164-api.c @@ -415,7 +415,7 @@ int saa7164_api_enum_subdevs(struct saa7164_dev *dev) goto out; } - if (debug & DBGLVL_API) + if (saa_debug & DBGLVL_API) saa7164_dumphex16(dev, buf, (buflen/16)*16); saa7164_api_dump_subdevs(dev, buf, buflen); @@ -480,7 +480,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg, dprintk(DBGLVL_API, "%s() len = %d bytes\n", __func__, len); - if (debug & DBGLVL_I2C) + if (saa_debug & DBGLVL_I2C) saa7164_dumphex16(dev, buf, 2 * 16); ret = saa7164_cmd_send(bus->dev, unitid, GET_CUR, @@ -488,7 +488,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg, if (ret != SAA_OK) printk(KERN_ERR "%s() error, ret(2) = 0x%x\n", __func__, ret); else { - if (debug & DBGLVL_I2C) + if (saa_debug & DBGLVL_I2C) saa7164_dumphex16(dev, buf, sizeof(buf)); memcpy(data, (buf + 2 * sizeof(u32) + reglen), datalen); } @@ -548,7 +548,7 @@ int saa7164_api_i2c_write(struct saa7164_i2c *bus, u8 addr, u32 datalen, *((u32 *)(buf + 1 * sizeof(u32))) = datalen - reglen; memcpy((buf + 2 * sizeof(u32)), data, datalen); - if (debug & DBGLVL_I2C) + if (saa_debug & DBGLVL_I2C) saa7164_dumphex16(dev, buf, sizeof(buf)); ret = saa7164_cmd_send(bus->dev, unitid, SET_CUR, diff --git a/drivers/media/video/saa7164/saa7164-cmd.c b/drivers/media/video/saa7164/saa7164-cmd.c index e097f1a0969..c45966edc0c 100644 --- a/drivers/media/video/saa7164/saa7164-cmd.c +++ b/drivers/media/video/saa7164/saa7164-cmd.c @@ -250,7 +250,7 @@ int saa7164_cmd_wait(struct saa7164_dev *dev, u8 seqno) unsigned long stamp; int r; - if (debug >= 4) + if (saa_debug >= 4) saa7164_bus_dump(dev); dprintk(DBGLVL_CMD, "%s(seqno=%d)\n", __func__, seqno); diff --git a/drivers/media/video/saa7164/saa7164-core.c b/drivers/media/video/saa7164/saa7164-core.c index f0dbead188c..709affc3104 100644 --- a/drivers/media/video/saa7164/saa7164-core.c +++ b/drivers/media/video/saa7164/saa7164-core.c @@ -45,8 +45,8 @@ MODULE_LICENSE("GPL"); 32 bus */ -unsigned int debug; -module_param(debug, int, 0644); +unsigned int saa_debug; +module_param_named(debug, saa_debug, int, 0644); MODULE_PARM_DESC(debug, "enable debug messages"); unsigned int waitsecs = 10; @@ -653,7 +653,7 @@ static int __devinit saa7164_initdev(struct pci_dev *pci_dev, printk(KERN_ERR "%s() Unsupported board detected, " "registering without firmware\n", __func__); - dprintk(1, "%s() parameter debug = %d\n", __func__, debug); + dprintk(1, "%s() parameter debug = %d\n", __func__, saa_debug); dprintk(1, "%s() parameter waitsecs = %d\n", __func__, waitsecs); fail_fw: diff --git a/drivers/media/video/saa7164/saa7164.h b/drivers/media/video/saa7164/saa7164.h index 6753008a9c9..42660b546f0 100644 --- a/drivers/media/video/saa7164/saa7164.h +++ b/drivers/media/video/saa7164/saa7164.h @@ -375,9 +375,9 @@ extern int saa7164_buffer_dealloc(struct saa7164_tsport *port, /* ----------------------------------------------------------- */ -extern unsigned int debug; +extern unsigned int saa_debug; #define dprintk(level, fmt, arg...)\ - do { if (debug & level)\ + do { if (saa_debug & level)\ printk(KERN_DEBUG "%s: " fmt, dev->name, ## arg);\ } while (0) diff --git a/drivers/media/video/usbvision/usbvision-core.c b/drivers/media/video/usbvision/usbvision-core.c index 6ba16abeebd..e0f91e4ab65 100644 --- a/drivers/media/video/usbvision/usbvision-core.c +++ b/drivers/media/video/usbvision/usbvision-core.c @@ -28,7 +28,6 @@ #include <linux/timer.h> #include <linux/slab.h> #include <linux/mm.h> -#include <linux/utsname.h> #include <linux/highmem.h> #include <linux/vmalloc.h> #include <linux/module.h> diff --git a/drivers/media/video/usbvision/usbvision-i2c.c b/drivers/media/video/usbvision/usbvision-i2c.c index f97fd06d594..c19f51dba2e 100644 --- a/drivers/media/video/usbvision/usbvision-i2c.c +++ b/drivers/media/video/usbvision/usbvision-i2c.c @@ -28,7 +28,6 @@ #include <linux/module.h> #include <linux/delay.h> #include <linux/slab.h> -#include <linux/utsname.h> #include <linux/init.h> #include <asm/uaccess.h> #include <linux/ioport.h> diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c index 90d9b5c0e9a..a2a50d608a3 100644 --- a/drivers/media/video/usbvision/usbvision-video.c +++ b/drivers/media/video/usbvision/usbvision-video.c @@ -52,7 +52,6 @@ #include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/mm.h> -#include <linux/utsname.h> #include <linux/highmem.h> #include <linux/vmalloc.h> #include <linux/module.h> diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index a5b448ea4ea..b3bf1c44d74 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -339,9 +339,9 @@ static int h_memstick_read_dev_id(struct memstick_dev *card, card->id.type = id_reg.type; card->id.category = id_reg.category; card->id.class = id_reg.class; + dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode); } complete(&card->mrq_complete); - dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode); return -EAGAIN; } } diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index 79689b10f93..766e21e1557 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -937,6 +937,8 @@ static int quicktest1(unsigned long arg) /* Need 1K cacheline aligned that does not cross page boundary */ p = kmalloc(4096, 0); + if (p == NULL) + return -ENOMEM; mq = ALIGNUP(p, 1024); memset(mes, 0xee, sizeof(mes)); dw = mq; diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c index 9cbf95bedce..ccd4408a26c 100644 --- a/drivers/misc/sgi-gru/gruprocfs.c +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -340,10 +340,9 @@ static struct proc_dir_entry *proc_gru __read_mostly; static int create_proc_file(struct proc_entry *p) { - p->entry = create_proc_entry(p->name, p->mode, proc_gru); + p->entry = proc_create(p->name, p->mode, proc_gru, p->fops); if (!p->entry) return -1; - p->entry->proc_fops = p->fops; return 0; } diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 065fa818be5..fc25586b7ee 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -599,6 +599,7 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data) struct scatterlist *sg; unsigned int i; enum dma_data_direction direction; + unsigned int sglen; /* * We don't do DMA on "complex" transfers, i.e. with @@ -628,11 +629,14 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data) else direction = DMA_TO_DEVICE; + sglen = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len, direction); + if (sglen != data->sg_len) + goto unmap_exit; desc = chan->device->device_prep_slave_sg(chan, data->sg, data->sg_len, direction, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!desc) - return -ENOMEM; + goto unmap_exit; host->dma.data_desc = desc; desc->callback = atmci_dma_complete; @@ -643,6 +647,9 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data) chan->device->device_issue_pending(chan); return 0; +unmap_exit: + dma_unmap_sg(&host->pdev->dev, data->sg, sglen, direction); + return -ENOMEM; } #else /* CONFIG_MMC_ATMELMCI_DMA */ diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index e4ec3659759..ecf90f5c97c 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -159,7 +159,7 @@ config MTD_AFS_PARTS config MTD_OF_PARTS tristate "Flash partition map based on OF description" - depends on PPC_OF && MTD_PARTITIONS + depends on (MICROBLAZE || PPC_OF) && MTD_PARTITIONS help This provides a partition parsing function which derives the partition map from the children of the flash node, diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index 3a9a960644b..841e085ab74 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -74,7 +74,7 @@ config MTD_PHYSMAP_BANKWIDTH config MTD_PHYSMAP_OF tristate "Flash device in physical memory map based on OF description" - depends on PPC_OF && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM) + depends on (MICROBLAZE || PPC_OF) && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM) help This provides a 'mapping' driver which allows the NOR Flash and ROM driver code to communicate with chips which are mapped diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index 7adff4d0960..b9eeadf01b7 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -813,10 +813,10 @@ static int vortex_suspend(struct pci_dev *pdev, pm_message_t state) if (netif_running(dev)) { netif_device_detach(dev); vortex_down(dev, 1); + disable_irq(dev->irq); } pci_save_state(pdev); pci_enable_wake(pdev, pci_choose_state(pdev, state), 0); - free_irq(dev->irq, dev); pci_disable_device(pdev); pci_set_power_state(pdev, pci_choose_state(pdev, state)); } @@ -839,18 +839,12 @@ static int vortex_resume(struct pci_dev *pdev) return err; } pci_set_master(pdev); - if (request_irq(dev->irq, vp->full_bus_master_rx ? - &boomerang_interrupt : &vortex_interrupt, IRQF_SHARED, dev->name, dev)) { - pr_warning("%s: Could not reserve IRQ %d\n", dev->name, dev->irq); - pci_disable_device(pdev); - return -EBUSY; - } if (netif_running(dev)) { err = vortex_up(dev); if (err) return err; - else - netif_device_attach(dev); + enable_irq(dev->irq); + netif_device_attach(dev); } } return 0; diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c index 462d9f59c53..83a1922e68e 100644 --- a/drivers/net/8139cp.c +++ b/drivers/net/8139cp.c @@ -87,7 +87,7 @@ /* These identify the driver base version and may not be removed. */ static char version[] = -KERN_INFO DRV_NAME ": 10/100 PCI Ethernet driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; +DRV_NAME ": 10/100 PCI Ethernet driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; MODULE_AUTHOR("Jeff Garzik <jgarzik@pobox.com>"); MODULE_DESCRIPTION("RealTek RTL-8139C+ series 10/100 PCI Ethernet driver"); diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index ed5741b2e70..2bea67c134f 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -1875,7 +1875,7 @@ config 68360_ENET config FEC bool "FEC ethernet controller (of ColdFire and some i.MX CPUs)" - depends on M523x || M527x || M5272 || M528x || M520x || M532x || MACH_MX27 || ARCH_MX35 + depends on M523x || M527x || M5272 || M528x || M520x || M532x || MACH_MX27 || ARCH_MX35 || ARCH_MX25 help Say Y here if you want to use the built-in 10/100 Fast ethernet controller on some Motorola ColdFire and Freescale i.MX processors. diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c index be2c6cfe6e8..1372e9a99f5 100644 --- a/drivers/net/atl1c/atl1c_main.c +++ b/drivers/net/atl1c/atl1c_main.c @@ -2296,7 +2296,7 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state) u32 ctrl; u32 mac_ctrl_data; u32 master_ctrl_data; - u32 wol_ctrl_data; + u32 wol_ctrl_data = 0; u16 mii_bmsr_data; u16 save_autoneg_advertised; u16 mii_intr_status_data; diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 09007437246..df32c109b7a 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -75,6 +75,13 @@ config CAN_EMS_PCI CPC-PCIe and CPC-104P cards from EMS Dr. Thomas Wuensche (http://www.ems-wuensche.de). +config CAN_EMS_USB + tristate "EMS CPC-USB/ARM7 CAN/USB interface" + depends on USB && CAN_DEV + ---help--- + This driver is for the one channel CPC-USB/ARM7 CAN/USB interface + from from EMS Dr. Thomas Wuensche (http://www.ems-wuensche.de). + config CAN_KVASER_PCI tristate "Kvaser PCIcanx and Kvaser PCIcan PCI Cards" depends on PCI && CAN_SJA1000 @@ -82,6 +89,12 @@ config CAN_KVASER_PCI This driver is for the the PCIcanx and PCIcan cards (1, 2 or 4 channel) from Kvaser (http://www.kvaser.com). +config CAN_AT91 + tristate "Atmel AT91 onchip CAN controller" + depends on CAN && CAN_DEV && ARCH_AT91SAM9263 + ---help--- + This is a driver for the SoC CAN controller in Atmel's AT91SAM9263. + config CAN_DEBUG_DEVICES bool "CAN devices debugging messages" depends on CAN diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index 523a941b358..0dea62721f2 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -7,6 +7,9 @@ obj-$(CONFIG_CAN_VCAN) += vcan.o obj-$(CONFIG_CAN_DEV) += can-dev.o can-dev-y := dev.o +obj-y += usb/ + obj-$(CONFIG_CAN_SJA1000) += sja1000/ +obj-$(CONFIG_CAN_AT91) += at91_can.o ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG diff --git a/drivers/net/can/sja1000/ems_pci.c b/drivers/net/can/sja1000/ems_pci.c index 7d84b8ac9c1..fd04789d337 100644 --- a/drivers/net/can/sja1000/ems_pci.c +++ b/drivers/net/can/sja1000/ems_pci.c @@ -94,12 +94,14 @@ struct ems_pci_card { #define EMS_PCI_CDR (CDR_CBP | CDR_CLKOUT_MASK) #define EMS_PCI_V1_BASE_BAR 1 -#define EMS_PCI_V1_MEM_SIZE 4096 +#define EMS_PCI_V1_CONF_SIZE 4096 /* size of PITA control area */ #define EMS_PCI_V2_BASE_BAR 2 -#define EMS_PCI_V2_MEM_SIZE 128 +#define EMS_PCI_V2_CONF_SIZE 128 /* size of PLX control area */ #define EMS_PCI_CAN_BASE_OFFSET 0x400 /* offset where the controllers starts */ #define EMS_PCI_CAN_CTRL_SIZE 0x200 /* memory size for each controller */ +#define EMS_PCI_BASE_SIZE 4096 /* size of controller area */ + static struct pci_device_id ems_pci_tbl[] = { /* CPC-PCI v1 */ {PCI_VENDOR_ID_SIEMENS, 0x2104, PCI_ANY_ID, PCI_ANY_ID,}, @@ -224,7 +226,7 @@ static int __devinit ems_pci_add_card(struct pci_dev *pdev, struct sja1000_priv *priv; struct net_device *dev; struct ems_pci_card *card; - int max_chan, mem_size, base_bar; + int max_chan, conf_size, base_bar; int err, i; /* Enabling PCI device */ @@ -251,22 +253,22 @@ static int __devinit ems_pci_add_card(struct pci_dev *pdev, card->version = 2; /* CPC-PCI v2 */ max_chan = EMS_PCI_V2_MAX_CHAN; base_bar = EMS_PCI_V2_BASE_BAR; - mem_size = EMS_PCI_V2_MEM_SIZE; + conf_size = EMS_PCI_V2_CONF_SIZE; } else { card->version = 1; /* CPC-PCI v1 */ max_chan = EMS_PCI_V1_MAX_CHAN; base_bar = EMS_PCI_V1_BASE_BAR; - mem_size = EMS_PCI_V1_MEM_SIZE; + conf_size = EMS_PCI_V1_CONF_SIZE; } /* Remap configuration space and controller memory area */ - card->conf_addr = pci_iomap(pdev, 0, mem_size); + card->conf_addr = pci_iomap(pdev, 0, conf_size); if (card->conf_addr == NULL) { err = -ENOMEM; goto failure_cleanup; } - card->base_addr = pci_iomap(pdev, base_bar, mem_size); + card->base_addr = pci_iomap(pdev, base_bar, EMS_PCI_BASE_SIZE); if (card->base_addr == NULL) { err = -ENOMEM; goto failure_cleanup; diff --git a/drivers/net/can/usb/Makefile b/drivers/net/can/usb/Makefile new file mode 100644 index 00000000000..c3f75ba701b --- /dev/null +++ b/drivers/net/can/usb/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Linux Controller Area Network USB drivers. +# + +obj-$(CONFIG_CAN_EMS_USB) += ems_usb.o diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c new file mode 100644 index 00000000000..9012e0abc62 --- /dev/null +++ b/drivers/net/can/usb/ems_usb.c @@ -0,0 +1,1155 @@ +/* + * CAN driver for EMS Dr. Thomas Wuensche CPC-USB/ARM7 + * + * Copyright (C) 2004-2009 EMS Dr. Thomas Wuensche + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published + * by the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#include <linux/init.h> +#include <linux/signal.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/usb.h> + +#include <linux/can.h> +#include <linux/can/dev.h> +#include <linux/can/error.h> + +MODULE_AUTHOR("Sebastian Haas <haas@ems-wuensche.com>"); +MODULE_DESCRIPTION("CAN driver for EMS Dr. Thomas Wuensche CAN/USB interfaces"); +MODULE_LICENSE("GPL v2"); + +/* Control-Values for CPC_Control() Command Subject Selection */ +#define CONTR_CAN_MESSAGE 0x04 +#define CONTR_CAN_STATE 0x0C +#define CONTR_BUS_ERROR 0x1C + +/* Control Command Actions */ +#define CONTR_CONT_OFF 0 +#define CONTR_CONT_ON 1 +#define CONTR_ONCE 2 + +/* Messages from CPC to PC */ +#define CPC_MSG_TYPE_CAN_FRAME 1 /* CAN data frame */ +#define CPC_MSG_TYPE_RTR_FRAME 8 /* CAN remote frame */ +#define CPC_MSG_TYPE_CAN_PARAMS 12 /* Actual CAN parameters */ +#define CPC_MSG_TYPE_CAN_STATE 14 /* CAN state message */ +#define CPC_MSG_TYPE_EXT_CAN_FRAME 16 /* Extended CAN data frame */ +#define CPC_MSG_TYPE_EXT_RTR_FRAME 17 /* Extended remote frame */ +#define CPC_MSG_TYPE_CONTROL 19 /* change interface behavior */ +#define CPC_MSG_TYPE_CONFIRM 20 /* command processed confirmation */ +#define CPC_MSG_TYPE_OVERRUN 21 /* overrun events */ +#define CPC_MSG_TYPE_CAN_FRAME_ERROR 23 /* detected bus errors */ +#define CPC_MSG_TYPE_ERR_COUNTER 25 /* RX/TX error counter */ + +/* Messages from the PC to the CPC interface */ +#define CPC_CMD_TYPE_CAN_FRAME 1 /* CAN data frame */ +#define CPC_CMD_TYPE_CONTROL 3 /* control of interface behavior */ +#define CPC_CMD_TYPE_CAN_PARAMS 6 /* set CAN parameters */ +#define CPC_CMD_TYPE_RTR_FRAME 13 /* CAN remote frame */ +#define CPC_CMD_TYPE_CAN_STATE 14 /* CAN state message */ +#define CPC_CMD_TYPE_EXT_CAN_FRAME 15 /* Extended CAN data frame */ +#define CPC_CMD_TYPE_EXT_RTR_FRAME 16 /* Extended CAN remote frame */ +#define CPC_CMD_TYPE_CAN_EXIT 200 /* exit the CAN */ + +#define CPC_CMD_TYPE_INQ_ERR_COUNTER 25 /* request the CAN error counters */ +#define CPC_CMD_TYPE_CLEAR_MSG_QUEUE 8 /* clear CPC_MSG queue */ +#define CPC_CMD_TYPE_CLEAR_CMD_QUEUE 28 /* clear CPC_CMD queue */ + +#define CPC_CC_TYPE_SJA1000 2 /* Philips basic CAN controller */ + +#define CPC_CAN_ECODE_ERRFRAME 0x01 /* Ecode type */ + +/* Overrun types */ +#define CPC_OVR_EVENT_CAN 0x01 +#define CPC_OVR_EVENT_CANSTATE 0x02 +#define CPC_OVR_EVENT_BUSERROR 0x04 + +/* + * If the CAN controller lost a message we indicate it with the highest bit + * set in the count field. + */ +#define CPC_OVR_HW 0x80 + +/* Size of the "struct ems_cpc_msg" without the union */ +#define CPC_MSG_HEADER_LEN 11 +#define CPC_CAN_MSG_MIN_SIZE 5 + +/* Define these values to match your devices */ +#define USB_CPCUSB_VENDOR_ID 0x12D6 + +#define USB_CPCUSB_ARM7_PRODUCT_ID 0x0444 + +/* Mode register NXP LPC2119/SJA1000 CAN Controller */ +#define SJA1000_MOD_NORMAL 0x00 +#define SJA1000_MOD_RM 0x01 + +/* ECC register NXP LPC2119/SJA1000 CAN Controller */ +#define SJA1000_ECC_SEG 0x1F +#define SJA1000_ECC_DIR 0x20 +#define SJA1000_ECC_ERR 0x06 +#define SJA1000_ECC_BIT 0x00 +#define SJA1000_ECC_FORM 0x40 +#define SJA1000_ECC_STUFF 0x80 +#define SJA1000_ECC_MASK 0xc0 + +/* Status register content */ +#define SJA1000_SR_BS 0x80 +#define SJA1000_SR_ES 0x40 + +#define SJA1000_DEFAULT_OUTPUT_CONTROL 0xDA + +/* + * The device actually uses a 16MHz clock to generate the CAN clock + * but it expects SJA1000 bit settings based on 8MHz (is internally + * converted). + */ +#define EMS_USB_ARM7_CLOCK 8000000 + +/* + * CAN-Message representation in a CPC_MSG. Message object type is + * CPC_MSG_TYPE_CAN_FRAME or CPC_MSG_TYPE_RTR_FRAME or + * CPC_MSG_TYPE_EXT_CAN_FRAME or CPC_MSG_TYPE_EXT_RTR_FRAME. + */ +struct cpc_can_msg { + u32 id; + u8 length; + u8 msg[8]; +}; + +/* Representation of the CAN parameters for the SJA1000 controller */ +struct cpc_sja1000_params { + u8 mode; + u8 acc_code0; + u8 acc_code1; + u8 acc_code2; + u8 acc_code3; + u8 acc_mask0; + u8 acc_mask1; + u8 acc_mask2; + u8 acc_mask3; + u8 btr0; + u8 btr1; + u8 outp_contr; +}; + +/* CAN params message representation */ +struct cpc_can_params { + u8 cc_type; + + /* Will support M16C CAN controller in the future */ + union { + struct cpc_sja1000_params sja1000; + } cc_params; +}; + +/* Structure for confirmed message handling */ +struct cpc_confirm { + u8 error; /* error code */ +}; + +/* Structure for overrun conditions */ +struct cpc_overrun { + u8 event; + u8 count; +}; + +/* SJA1000 CAN errors (compatible to NXP LPC2119) */ +struct cpc_sja1000_can_error { + u8 ecc; + u8 rxerr; + u8 txerr; +}; + +/* structure for CAN error conditions */ +struct cpc_can_error { + u8 ecode; + + struct { + u8 cc_type; + + /* Other controllers may also provide error code capture regs */ + union { + struct cpc_sja1000_can_error sja1000; + } regs; + } cc; +}; + +/* + * Structure containing RX/TX error counter. This structure is used to request + * the values of the CAN controllers TX and RX error counter. + */ +struct cpc_can_err_counter { + u8 rx; + u8 tx; +}; + +/* Main message type used between library and application */ +struct __attribute__ ((packed)) ems_cpc_msg { + u8 type; /* type of message */ + u8 length; /* length of data within union 'msg' */ + u8 msgid; /* confirmation handle */ + u32 ts_sec; /* timestamp in seconds */ + u32 ts_nsec; /* timestamp in nano seconds */ + + union { + u8 generic[64]; + struct cpc_can_msg can_msg; + struct cpc_can_params can_params; + struct cpc_confirm confirmation; + struct cpc_overrun overrun; + struct cpc_can_error error; + struct cpc_can_err_counter err_counter; + u8 can_state; + } msg; +}; + +/* + * Table of devices that work with this driver + * NOTE: This driver supports only CPC-USB/ARM7 (LPC2119) yet. + */ +static struct usb_device_id ems_usb_table[] = { + {USB_DEVICE(USB_CPCUSB_VENDOR_ID, USB_CPCUSB_ARM7_PRODUCT_ID)}, + {} /* Terminating entry */ +}; + +MODULE_DEVICE_TABLE(usb, ems_usb_table); + +#define RX_BUFFER_SIZE 64 +#define CPC_HEADER_SIZE 4 +#define INTR_IN_BUFFER_SIZE 4 + +#define MAX_RX_URBS 10 +#define MAX_TX_URBS CAN_ECHO_SKB_MAX + +struct ems_usb; + +struct ems_tx_urb_context { + struct ems_usb *dev; + + u32 echo_index; + u8 dlc; +}; + +struct ems_usb { + struct can_priv can; /* must be the first member */ + int open_time; + + struct sk_buff *echo_skb[MAX_TX_URBS]; + + struct usb_device *udev; + struct net_device *netdev; + + atomic_t active_tx_urbs; + struct usb_anchor tx_submitted; + struct ems_tx_urb_context tx_contexts[MAX_TX_URBS]; + + struct usb_anchor rx_submitted; + + struct urb *intr_urb; + + u8 *tx_msg_buffer; + + u8 *intr_in_buffer; + unsigned int free_slots; /* remember number of available slots */ + + struct ems_cpc_msg active_params; /* active controller parameters */ +}; + +static void ems_usb_read_interrupt_callback(struct urb *urb) +{ + struct ems_usb *dev = urb->context; + struct net_device *netdev = dev->netdev; + int err; + + if (!netif_device_present(netdev)) + return; + + switch (urb->status) { + case 0: + dev->free_slots = dev->intr_in_buffer[1]; + break; + + case -ECONNRESET: /* unlink */ + case -ENOENT: + case -ESHUTDOWN: + return; + + default: + dev_info(netdev->dev.parent, "Rx interrupt aborted %d\n", + urb->status); + break; + } + + err = usb_submit_urb(urb, GFP_ATOMIC); + + if (err == -ENODEV) + netif_device_detach(netdev); + else if (err) + dev_err(netdev->dev.parent, + "failed resubmitting intr urb: %d\n", err); + + return; +} + +static void ems_usb_rx_can_msg(struct ems_usb *dev, struct ems_cpc_msg *msg) +{ + struct can_frame *cf; + struct sk_buff *skb; + int i; + struct net_device_stats *stats = &dev->netdev->stats; + + skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame)); + if (skb == NULL) + return; + + skb->protocol = htons(ETH_P_CAN); + + cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); + + cf->can_id = msg->msg.can_msg.id; + cf->can_dlc = min_t(u8, msg->msg.can_msg.length, 8); + + if (msg->type == CPC_MSG_TYPE_EXT_CAN_FRAME + || msg->type == CPC_MSG_TYPE_EXT_RTR_FRAME) + cf->can_id |= CAN_EFF_FLAG; + + if (msg->type == CPC_MSG_TYPE_RTR_FRAME + || msg->type == CPC_MSG_TYPE_EXT_RTR_FRAME) { + cf->can_id |= CAN_RTR_FLAG; + } else { + for (i = 0; i < cf->can_dlc; i++) + cf->data[i] = msg->msg.can_msg.msg[i]; + } + + netif_rx(skb); + + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; +} + +static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg) +{ + struct can_frame *cf; + struct sk_buff *skb; + struct net_device_stats *stats = &dev->netdev->stats; + + skb = netdev_alloc_skb(dev->netdev, sizeof(struct can_frame)); + if (skb == NULL) + return; + + skb->protocol = htons(ETH_P_CAN); + + cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); + memset(cf, 0, sizeof(struct can_frame)); + + cf->can_id = CAN_ERR_FLAG; + cf->can_dlc = CAN_ERR_DLC; + + if (msg->type == CPC_MSG_TYPE_CAN_STATE) { + u8 state = msg->msg.can_state; + + if (state & SJA1000_SR_BS) { + dev->can.state = CAN_STATE_BUS_OFF; + cf->can_id |= CAN_ERR_BUSOFF; + + can_bus_off(dev->netdev); + } else if (state & SJA1000_SR_ES) { + dev->can.state = CAN_STATE_ERROR_WARNING; + dev->can.can_stats.error_warning++; + } else { + dev->can.state = CAN_STATE_ERROR_ACTIVE; + dev->can.can_stats.error_passive++; + } + } else if (msg->type == CPC_MSG_TYPE_CAN_FRAME_ERROR) { + u8 ecc = msg->msg.error.cc.regs.sja1000.ecc; + u8 txerr = msg->msg.error.cc.regs.sja1000.txerr; + u8 rxerr = msg->msg.error.cc.regs.sja1000.rxerr; + + /* bus error interrupt */ + dev->can.can_stats.bus_error++; + stats->rx_errors++; + + cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR; + + switch (ecc & SJA1000_ECC_MASK) { + case SJA1000_ECC_BIT: + cf->data[2] |= CAN_ERR_PROT_BIT; + break; + case SJA1000_ECC_FORM: + cf->data[2] |= CAN_ERR_PROT_FORM; + break; + case SJA1000_ECC_STUFF: + cf->data[2] |= CAN_ERR_PROT_STUFF; + break; + default: + cf->data[2] |= CAN_ERR_PROT_UNSPEC; + cf->data[3] = ecc & SJA1000_ECC_SEG; + break; + } + + /* Error occured during transmission? */ + if ((ecc & SJA1000_ECC_DIR) == 0) + cf->data[2] |= CAN_ERR_PROT_TX; + + if (dev->can.state == CAN_STATE_ERROR_WARNING || + dev->can.state == CAN_STATE_ERROR_PASSIVE) { + cf->data[1] = (txerr > rxerr) ? + CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE; + } + } else if (msg->type == CPC_MSG_TYPE_OVERRUN) { + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; + + stats->rx_over_errors++; + stats->rx_errors++; + } + + netif_rx(skb); + + stats->rx_packets++; + stats->rx_bytes += cf->can_dlc; +} + +/* + * callback for bulk IN urb + */ +static void ems_usb_read_bulk_callback(struct urb *urb) +{ + struct ems_usb *dev = urb->context; + struct net_device *netdev; + int retval; + + netdev = dev->netdev; + + if (!netif_device_present(netdev)) + return; + + switch (urb->status) { + case 0: /* success */ + break; + + case -ENOENT: + return; + + default: + dev_info(netdev->dev.parent, "Rx URB aborted (%d)\n", + urb->status); + goto resubmit_urb; + } + + if (urb->actual_length > CPC_HEADER_SIZE) { + struct ems_cpc_msg *msg; + u8 *ibuf = urb->transfer_buffer; + u8 msg_count, again, start; + + msg_count = ibuf[0] & ~0x80; + again = ibuf[0] & 0x80; + + start = CPC_HEADER_SIZE; + + while (msg_count) { + msg = (struct ems_cpc_msg *)&ibuf[start]; + + switch (msg->type) { + case CPC_MSG_TYPE_CAN_STATE: + /* Process CAN state changes */ + ems_usb_rx_err(dev, msg); + break; + + case CPC_MSG_TYPE_CAN_FRAME: + case CPC_MSG_TYPE_EXT_CAN_FRAME: + case CPC_MSG_TYPE_RTR_FRAME: + case CPC_MSG_TYPE_EXT_RTR_FRAME: + ems_usb_rx_can_msg(dev, msg); + break; + + case CPC_MSG_TYPE_CAN_FRAME_ERROR: + /* Process errorframe */ + ems_usb_rx_err(dev, msg); + break; + + case CPC_MSG_TYPE_OVERRUN: + /* Message lost while receiving */ + ems_usb_rx_err(dev, msg); + break; + } + + start += CPC_MSG_HEADER_LEN + msg->length; + msg_count--; + + if (start > urb->transfer_buffer_length) { + dev_err(netdev->dev.parent, "format error\n"); + break; + } + } + } + +resubmit_urb: + usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2), + urb->transfer_buffer, RX_BUFFER_SIZE, + ems_usb_read_bulk_callback, dev); + + retval = usb_submit_urb(urb, GFP_ATOMIC); + + if (retval == -ENODEV) + netif_device_detach(netdev); + else if (retval) + dev_err(netdev->dev.parent, + "failed resubmitting read bulk urb: %d\n", retval); + + return; +} + +/* + * callback for bulk IN urb + */ +static void ems_usb_write_bulk_callback(struct urb *urb) +{ + struct ems_tx_urb_context *context = urb->context; + struct ems_usb *dev; + struct net_device *netdev; + + BUG_ON(!context); + + dev = context->dev; + netdev = dev->netdev; + + /* free up our allocated buffer */ + usb_buffer_free(urb->dev, urb->transfer_buffer_length, + urb->transfer_buffer, urb->transfer_dma); + + atomic_dec(&dev->active_tx_urbs); + + if (!netif_device_present(netdev)) + return; + + if (urb->status) + dev_info(netdev->dev.parent, "Tx URB aborted (%d)\n", + urb->status); + + netdev->trans_start = jiffies; + + /* transmission complete interrupt */ + netdev->stats.tx_packets++; + netdev->stats.tx_bytes += context->dlc; + + can_get_echo_skb(netdev, context->echo_index); + + /* Release context */ + context->echo_index = MAX_TX_URBS; + + if (netif_queue_stopped(netdev)) + netif_wake_queue(netdev); +} + +/* + * Send the given CPC command synchronously + */ +static int ems_usb_command_msg(struct ems_usb *dev, struct ems_cpc_msg *msg) +{ + int actual_length; + + /* Copy payload */ + memcpy(&dev->tx_msg_buffer[CPC_HEADER_SIZE], msg, + msg->length + CPC_MSG_HEADER_LEN); + + /* Clear header */ + memset(&dev->tx_msg_buffer[0], 0, CPC_HEADER_SIZE); + + return usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, 2), + &dev->tx_msg_buffer[0], + msg->length + CPC_MSG_HEADER_LEN + CPC_HEADER_SIZE, + &actual_length, 1000); +} + +/* + * Change CAN controllers' mode register + */ +static int ems_usb_write_mode(struct ems_usb *dev, u8 mode) +{ + dev->active_params.msg.can_params.cc_params.sja1000.mode = mode; + + return ems_usb_command_msg(dev, &dev->active_params); +} + +/* + * Send a CPC_Control command to change behaviour when interface receives a CAN + * message, bus error or CAN state changed notifications. + */ +static int ems_usb_control_cmd(struct ems_usb *dev, u8 val) +{ + struct ems_cpc_msg cmd; + + cmd.type = CPC_CMD_TYPE_CONTROL; + cmd.length = CPC_MSG_HEADER_LEN + 1; + + cmd.msgid = 0; + + cmd.msg.generic[0] = val; + + return ems_usb_command_msg(dev, &cmd); +} + +/* + * Start interface + */ +static int ems_usb_start(struct ems_usb *dev) +{ + struct net_device *netdev = dev->netdev; + int err, i; + + dev->intr_in_buffer[0] = 0; + dev->free_slots = 15; /* initial size */ + + for (i = 0; i < MAX_RX_URBS; i++) { + struct urb *urb = NULL; + u8 *buf = NULL; + + /* create a URB, and a buffer for it */ + urb = usb_alloc_urb(0, GFP_KERNEL); + if (!urb) { + dev_err(netdev->dev.parent, + "No memory left for URBs\n"); + return -ENOMEM; + } + + buf = usb_buffer_alloc(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL, + &urb->transfer_dma); + if (!buf) { + dev_err(netdev->dev.parent, + "No memory left for USB buffer\n"); + usb_free_urb(urb); + return -ENOMEM; + } + + usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2), + buf, RX_BUFFER_SIZE, + ems_usb_read_bulk_callback, dev); + urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; + usb_anchor_urb(urb, &dev->rx_submitted); + + err = usb_submit_urb(urb, GFP_KERNEL); + if (err) { + if (err == -ENODEV) + netif_device_detach(dev->netdev); + + usb_unanchor_urb(urb); + usb_buffer_free(dev->udev, RX_BUFFER_SIZE, buf, + urb->transfer_dma); + break; + } + + /* Drop reference, USB core will take care of freeing it */ + usb_free_urb(urb); + } + + /* Did we submit any URBs */ + if (i == 0) { + dev_warn(netdev->dev.parent, "couldn't setup read URBs\n"); + return err; + } + + /* Warn if we've couldn't transmit all the URBs */ + if (i < MAX_RX_URBS) + dev_warn(netdev->dev.parent, "rx performance may be slow\n"); + + /* Setup and start interrupt URB */ + usb_fill_int_urb(dev->intr_urb, dev->udev, + usb_rcvintpipe(dev->udev, 1), + dev->intr_in_buffer, + INTR_IN_BUFFER_SIZE, + ems_usb_read_interrupt_callback, dev, 1); + + err = usb_submit_urb(dev->intr_urb, GFP_KERNEL); + if (err) { + if (err == -ENODEV) + netif_device_detach(dev->netdev); + + dev_warn(netdev->dev.parent, "intr URB submit failed: %d\n", + err); + + return err; + } + + /* CPC-USB will transfer received message to host */ + err = ems_usb_control_cmd(dev, CONTR_CAN_MESSAGE | CONTR_CONT_ON); + if (err) + goto failed; + + /* CPC-USB will transfer CAN state changes to host */ + err = ems_usb_control_cmd(dev, CONTR_CAN_STATE | CONTR_CONT_ON); + if (err) + goto failed; + + /* CPC-USB will transfer bus errors to host */ + err = ems_usb_control_cmd(dev, CONTR_BUS_ERROR | CONTR_CONT_ON); + if (err) + goto failed; + + err = ems_usb_write_mode(dev, SJA1000_MOD_NORMAL); + if (err) + goto failed; + + dev->can.state = CAN_STATE_ERROR_ACTIVE; + + return 0; + +failed: + if (err == -ENODEV) + netif_device_detach(dev->netdev); + + dev_warn(netdev->dev.parent, "couldn't submit control: %d\n", err); + + return err; +} + +static void unlink_all_urbs(struct ems_usb *dev) +{ + int i; + + usb_unlink_urb(dev->intr_urb); + + usb_kill_anchored_urbs(&dev->rx_submitted); + + usb_kill_anchored_urbs(&dev->tx_submitted); + atomic_set(&dev->active_tx_urbs, 0); + + for (i = 0; i < MAX_TX_URBS; i++) + dev->tx_contexts[i].echo_index = MAX_TX_URBS; +} + +static int ems_usb_open(struct net_device *netdev) +{ + struct ems_usb *dev = netdev_priv(netdev); + int err; + + err = ems_usb_write_mode(dev, SJA1000_MOD_RM); + if (err) + return err; + + /* common open */ + err = open_candev(netdev); + if (err) + return err; + + /* finally start device */ + err = ems_usb_start(dev); + if (err) { + if (err == -ENODEV) + netif_device_detach(dev->netdev); + + dev_warn(netdev->dev.parent, "couldn't start device: %d\n", + err); + + close_candev(netdev); + + return err; + } + + dev->open_time = jiffies; + + netif_start_queue(netdev); + + return 0; +} + +static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct ems_usb *dev = netdev_priv(netdev); + struct ems_tx_urb_context *context = NULL; + struct net_device_stats *stats = &netdev->stats; + struct can_frame *cf = (struct can_frame *)skb->data; + struct ems_cpc_msg *msg; + struct urb *urb; + u8 *buf; + int i, err; + size_t size = CPC_HEADER_SIZE + CPC_MSG_HEADER_LEN + + sizeof(struct cpc_can_msg); + + /* create a URB, and a buffer for it, and copy the data to the URB */ + urb = usb_alloc_urb(0, GFP_ATOMIC); + if (!urb) { + dev_err(netdev->dev.parent, "No memory left for URBs\n"); + goto nomem; + } + + buf = usb_buffer_alloc(dev->udev, size, GFP_ATOMIC, &urb->transfer_dma); + if (!buf) { + dev_err(netdev->dev.parent, "No memory left for USB buffer\n"); + usb_free_urb(urb); + goto nomem; + } + + msg = (struct ems_cpc_msg *)&buf[CPC_HEADER_SIZE]; + + msg->msg.can_msg.id = cf->can_id & CAN_ERR_MASK; + msg->msg.can_msg.length = cf->can_dlc; + + if (cf->can_id & CAN_RTR_FLAG) { + msg->type = cf->can_id & CAN_EFF_FLAG ? + CPC_CMD_TYPE_EXT_RTR_FRAME : CPC_CMD_TYPE_RTR_FRAME; + + msg->length = CPC_CAN_MSG_MIN_SIZE; + } else { + msg->type = cf->can_id & CAN_EFF_FLAG ? + CPC_CMD_TYPE_EXT_CAN_FRAME : CPC_CMD_TYPE_CAN_FRAME; + + for (i = 0; i < cf->can_dlc; i++) + msg->msg.can_msg.msg[i] = cf->data[i]; + + msg->length = CPC_CAN_MSG_MIN_SIZE + cf->can_dlc; + } + + for (i = 0; i < MAX_TX_URBS; i++) { + if (dev->tx_contexts[i].echo_index == MAX_TX_URBS) { + context = &dev->tx_contexts[i]; + break; + } + } + + /* + * May never happen! When this happens we'd more URBs in flight as + * allowed (MAX_TX_URBS). + */ + if (!context) { + usb_unanchor_urb(urb); + usb_buffer_free(dev->udev, size, buf, urb->transfer_dma); + + dev_warn(netdev->dev.parent, "couldn't find free context\n"); + + return NETDEV_TX_BUSY; + } + + context->dev = dev; + context->echo_index = i; + context->dlc = cf->can_dlc; + + usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), buf, + size, ems_usb_write_bulk_callback, context); + urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; + usb_anchor_urb(urb, &dev->tx_submitted); + + can_put_echo_skb(skb, netdev, context->echo_index); + + atomic_inc(&dev->active_tx_urbs); + + err = usb_submit_urb(urb, GFP_ATOMIC); + if (unlikely(err)) { + can_free_echo_skb(netdev, context->echo_index); + + usb_unanchor_urb(urb); + usb_buffer_free(dev->udev, size, buf, urb->transfer_dma); + dev_kfree_skb(skb); + + atomic_dec(&dev->active_tx_urbs); + + if (err == -ENODEV) { + netif_device_detach(netdev); + } else { + dev_warn(netdev->dev.parent, "failed tx_urb %d\n", err); + + stats->tx_dropped++; + } + } else { + netdev->trans_start = jiffies; + + /* Slow down tx path */ + if (atomic_read(&dev->active_tx_urbs) >= MAX_TX_URBS || + dev->free_slots < 5) { + netif_stop_queue(netdev); + } + } + + /* + * Release our reference to this URB, the USB core will eventually free + * it entirely. + */ + usb_free_urb(urb); + + return NETDEV_TX_OK; + +nomem: + if (skb) + dev_kfree_skb(skb); + + stats->tx_dropped++; + + return NETDEV_TX_OK; +} + +static int ems_usb_close(struct net_device *netdev) +{ + struct ems_usb *dev = netdev_priv(netdev); + + /* Stop polling */ + unlink_all_urbs(dev); + + netif_stop_queue(netdev); + + /* Set CAN controller to reset mode */ + if (ems_usb_write_mode(dev, SJA1000_MOD_RM)) + dev_warn(netdev->dev.parent, "couldn't stop device"); + + close_candev(netdev); + + dev->open_time = 0; + + return 0; +} + +static const struct net_device_ops ems_usb_netdev_ops = { + .ndo_open = ems_usb_open, + .ndo_stop = ems_usb_close, + .ndo_start_xmit = ems_usb_start_xmit, +}; + +static struct can_bittiming_const ems_usb_bittiming_const = { + .name = "ems_usb", + .tseg1_min = 1, + .tseg1_max = 16, + .tseg2_min = 1, + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 64, + .brp_inc = 1, +}; + +static int ems_usb_set_mode(struct net_device *netdev, enum can_mode mode) +{ + struct ems_usb *dev = netdev_priv(netdev); + + if (!dev->open_time) + return -EINVAL; + + switch (mode) { + case CAN_MODE_START: + if (ems_usb_write_mode(dev, SJA1000_MOD_NORMAL)) + dev_warn(netdev->dev.parent, "couldn't start device"); + + if (netif_queue_stopped(netdev)) + netif_wake_queue(netdev); + break; + + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int ems_usb_set_bittiming(struct net_device *netdev) +{ + struct ems_usb *dev = netdev_priv(netdev); + struct can_bittiming *bt = &dev->can.bittiming; + u8 btr0, btr1; + + btr0 = ((bt->brp - 1) & 0x3f) | (((bt->sjw - 1) & 0x3) << 6); + btr1 = ((bt->prop_seg + bt->phase_seg1 - 1) & 0xf) | + (((bt->phase_seg2 - 1) & 0x7) << 4); + if (dev->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) + btr1 |= 0x80; + + dev_info(netdev->dev.parent, "setting BTR0=0x%02x BTR1=0x%02x\n", + btr0, btr1); + + dev->active_params.msg.can_params.cc_params.sja1000.btr0 = btr0; + dev->active_params.msg.can_params.cc_params.sja1000.btr1 = btr1; + + return ems_usb_command_msg(dev, &dev->active_params); +} + +static void init_params_sja1000(struct ems_cpc_msg *msg) +{ + struct cpc_sja1000_params *sja1000 = + &msg->msg.can_params.cc_params.sja1000; + + msg->type = CPC_CMD_TYPE_CAN_PARAMS; + msg->length = sizeof(struct cpc_can_params); + msg->msgid = 0; + + msg->msg.can_params.cc_type = CPC_CC_TYPE_SJA1000; + + /* Acceptance filter open */ + sja1000->acc_code0 = 0x00; + sja1000->acc_code1 = 0x00; + sja1000->acc_code2 = 0x00; + sja1000->acc_code3 = 0x00; + + /* Acceptance filter open */ + sja1000->acc_mask0 = 0xFF; + sja1000->acc_mask1 = 0xFF; + sja1000->acc_mask2 = 0xFF; + sja1000->acc_mask3 = 0xFF; + + sja1000->btr0 = 0; + sja1000->btr1 = 0; + + sja1000->outp_contr = SJA1000_DEFAULT_OUTPUT_CONTROL; + sja1000->mode = SJA1000_MOD_RM; +} + +/* + * probe function for new CPC-USB devices + */ +static int ems_usb_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ + struct net_device *netdev; + struct ems_usb *dev; + int i, err = -ENOMEM; + + netdev = alloc_candev(sizeof(struct ems_usb)); + if (!netdev) { + dev_err(netdev->dev.parent, "Couldn't alloc candev\n"); + return -ENOMEM; + } + + dev = netdev_priv(netdev); + + dev->udev = interface_to_usbdev(intf); + dev->netdev = netdev; + + dev->can.state = CAN_STATE_STOPPED; + dev->can.clock.freq = EMS_USB_ARM7_CLOCK; + dev->can.bittiming_const = &ems_usb_bittiming_const; + dev->can.do_set_bittiming = ems_usb_set_bittiming; + dev->can.do_set_mode = ems_usb_set_mode; + + netdev->flags |= IFF_ECHO; /* we support local echo */ + + netdev->netdev_ops = &ems_usb_netdev_ops; + + netdev->flags |= IFF_ECHO; /* we support local echo */ + + init_usb_anchor(&dev->rx_submitted); + + init_usb_anchor(&dev->tx_submitted); + atomic_set(&dev->active_tx_urbs, 0); + + for (i = 0; i < MAX_TX_URBS; i++) + dev->tx_contexts[i].echo_index = MAX_TX_URBS; + + dev->intr_urb = usb_alloc_urb(0, GFP_KERNEL); + if (!dev->intr_urb) { + dev_err(netdev->dev.parent, "Couldn't alloc intr URB\n"); + goto cleanup_candev; + } + + dev->intr_in_buffer = kzalloc(INTR_IN_BUFFER_SIZE, GFP_KERNEL); + if (!dev->intr_in_buffer) { + dev_err(netdev->dev.parent, "Couldn't alloc Intr buffer\n"); + goto cleanup_intr_urb; + } + + dev->tx_msg_buffer = kzalloc(CPC_HEADER_SIZE + + sizeof(struct ems_cpc_msg), GFP_KERNEL); + if (!dev->tx_msg_buffer) { + dev_err(netdev->dev.parent, "Couldn't alloc Tx buffer\n"); + goto cleanup_intr_in_buffer; + } + + usb_set_intfdata(intf, dev); + + SET_NETDEV_DEV(netdev, &intf->dev); + + init_params_sja1000(&dev->active_params); + + err = ems_usb_command_msg(dev, &dev->active_params); + if (err) { + dev_err(netdev->dev.parent, + "couldn't initialize controller: %d\n", err); + goto cleanup_tx_msg_buffer; + } + + err = register_candev(netdev); + if (err) { + dev_err(netdev->dev.parent, + "couldn't register CAN device: %d\n", err); + goto cleanup_tx_msg_buffer; + } + + return 0; + +cleanup_tx_msg_buffer: + kfree(dev->tx_msg_buffer); + +cleanup_intr_in_buffer: + kfree(dev->intr_in_buffer); + +cleanup_intr_urb: + usb_free_urb(dev->intr_urb); + +cleanup_candev: + free_candev(netdev); + + return err; +} + +/* + * called by the usb core when the device is removed from the system + */ +static void ems_usb_disconnect(struct usb_interface *intf) +{ + struct ems_usb *dev = usb_get_intfdata(intf); + + usb_set_intfdata(intf, NULL); + + if (dev) { + unregister_netdev(dev->netdev); + free_candev(dev->netdev); + + unlink_all_urbs(dev); + + usb_free_urb(dev->intr_urb); + + kfree(dev->intr_in_buffer); + } +} + +/* usb specific object needed to register this driver with the usb subsystem */ +static struct usb_driver ems_usb_driver = { + .name = "ems_usb", + .probe = ems_usb_probe, + .disconnect = ems_usb_disconnect, + .id_table = ems_usb_table, +}; + +static int __init ems_usb_init(void) +{ + int err; + + printk(KERN_INFO "CPC-USB kernel driver loaded\n"); + + /* register this driver with the USB subsystem */ + err = usb_register(&ems_usb_driver); + + if (err) { + err("usb_register failed. Error number %d\n", err); + return err; + } + + return 0; +} + +static void __exit ems_usb_exit(void) +{ + /* deregister this driver with the USB subsystem */ + usb_deregister(&ems_usb_driver); +} + +module_init(ems_usb_init); +module_exit(ems_usb_exit); diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c index d45eacb7670..211c8e9182f 100644 --- a/drivers/net/cnic.c +++ b/drivers/net/cnic.c @@ -85,8 +85,6 @@ static int cnic_uio_open(struct uio_info *uinfo, struct inode *inode) cp->uio_dev = iminor(inode); - cnic_shutdown_bnx2_rx_ring(dev); - cnic_init_bnx2_tx_ring(dev); cnic_init_bnx2_rx_ring(dev); @@ -98,6 +96,8 @@ static int cnic_uio_close(struct uio_info *uinfo, struct inode *inode) struct cnic_dev *dev = uinfo->priv; struct cnic_local *cp = dev->cnic_priv; + cnic_shutdown_bnx2_rx_ring(dev); + cp->uio_dev = -1; return 0; } diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c index 3e3fab8afb1..61f9da2b494 100644 --- a/drivers/net/cpmac.c +++ b/drivers/net/cpmac.c @@ -1109,7 +1109,7 @@ static int external_switch; static int __devinit cpmac_probe(struct platform_device *pdev) { int rc, phy_id; - char mdio_bus_id[BUS_ID_SIZE]; + char mdio_bus_id[MII_BUS_ID_SIZE]; struct resource *mem; struct cpmac_priv *priv; struct net_device *dev; @@ -1118,7 +1118,7 @@ static int __devinit cpmac_probe(struct platform_device *pdev) pdata = pdev->dev.platform_data; if (external_switch || dumb_switch) { - strncpy(mdio_bus_id, "0", BUS_ID_SIZE); /* fixed phys bus */ + strncpy(mdio_bus_id, "0", MII_BUS_ID_SIZE); /* fixed phys bus */ phy_id = pdev->id; } else { for (phy_id = 0; phy_id < PHY_MAX_ADDR; phy_id++) { @@ -1126,7 +1126,7 @@ static int __devinit cpmac_probe(struct platform_device *pdev) continue; if (!cpmac_mii->phy_map[phy_id]) continue; - strncpy(mdio_bus_id, cpmac_mii->id, BUS_ID_SIZE); + strncpy(mdio_bus_id, cpmac_mii->id, MII_BUS_ID_SIZE); break; } } @@ -1167,7 +1167,7 @@ static int __devinit cpmac_probe(struct platform_device *pdev) priv->msg_enable = netif_msg_init(debug_level, 0xff); memcpy(dev->dev_addr, pdata->dev_addr, sizeof(dev->dev_addr)); - snprintf(priv->phy_name, BUS_ID_SIZE, PHY_ID_FMT, mdio_bus_id, phy_id); + snprintf(priv->phy_name, MII_BUS_ID_SIZE, PHY_ID_FMT, mdio_bus_id, phy_id); priv->phy = phy_connect(dev, priv->phy_name, &cpmac_adjust_link, 0, PHY_INTERFACE_MODE_MII); diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index 977c3d35827..41bd7aeafd8 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -3083,7 +3083,6 @@ static const struct net_device_ops ehea_netdev_ops = { .ndo_poll_controller = ehea_netpoll, #endif .ndo_get_stats = ehea_get_stats, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = ehea_set_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = ehea_set_multicast_list, diff --git a/drivers/net/igb/e1000_mac.c b/drivers/net/igb/e1000_mac.c index a0231cd079f..7d76bb085e1 100644 --- a/drivers/net/igb/e1000_mac.c +++ b/drivers/net/igb/e1000_mac.c @@ -286,41 +286,6 @@ void igb_mta_set(struct e1000_hw *hw, u32 hash_value) } /** - * igb_update_mc_addr_list - Update Multicast addresses - * @hw: pointer to the HW structure - * @mc_addr_list: array of multicast addresses to program - * @mc_addr_count: number of multicast addresses to program - * - * Updates entire Multicast Table Array. - * The caller must have a packed mc_addr_list of multicast addresses. - **/ -void igb_update_mc_addr_list(struct e1000_hw *hw, - u8 *mc_addr_list, u32 mc_addr_count) -{ - u32 hash_value, hash_bit, hash_reg; - int i; - - /* clear mta_shadow */ - memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); - - /* update mta_shadow from mc_addr_list */ - for (i = 0; (u32) i < mc_addr_count; i++) { - hash_value = igb_hash_mc_addr(hw, mc_addr_list); - - hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); - hash_bit = hash_value & 0x1F; - - hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit); - mc_addr_list += (ETH_ALEN); - } - - /* replace the entire MTA table */ - for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) - array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]); - wrfl(); -} - -/** * igb_hash_mc_addr - Generate a multicast hash value * @hw: pointer to the HW structure * @mc_addr: pointer to a multicast address @@ -329,7 +294,7 @@ void igb_update_mc_addr_list(struct e1000_hw *hw, * the multicast filter table array address and new table value. See * igb_mta_set() **/ -u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) +static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) { u32 hash_value, hash_mask; u8 bit_shift = 0; @@ -392,6 +357,41 @@ u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) } /** + * igb_update_mc_addr_list - Update Multicast addresses + * @hw: pointer to the HW structure + * @mc_addr_list: array of multicast addresses to program + * @mc_addr_count: number of multicast addresses to program + * + * Updates entire Multicast Table Array. + * The caller must have a packed mc_addr_list of multicast addresses. + **/ +void igb_update_mc_addr_list(struct e1000_hw *hw, + u8 *mc_addr_list, u32 mc_addr_count) +{ + u32 hash_value, hash_bit, hash_reg; + int i; + + /* clear mta_shadow */ + memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); + + /* update mta_shadow from mc_addr_list */ + for (i = 0; (u32) i < mc_addr_count; i++) { + hash_value = igb_hash_mc_addr(hw, mc_addr_list); + + hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); + hash_bit = hash_value & 0x1F; + + hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit); + mc_addr_list += (ETH_ALEN); + } + + /* replace the entire MTA table */ + for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) + array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]); + wrfl(); +} + +/** * igb_clear_hw_cntrs_base - Clear base hardware counters * @hw: pointer to the HW structure * diff --git a/drivers/net/igb/e1000_mac.h b/drivers/net/igb/e1000_mac.h index 7518af8cbbf..bca17d88241 100644 --- a/drivers/net/igb/e1000_mac.h +++ b/drivers/net/igb/e1000_mac.h @@ -88,6 +88,5 @@ enum e1000_mng_mode { #define E1000_MNG_DHCP_COOKIE_STATUS_VLAN 0x2 extern void e1000_init_function_pointers_82575(struct e1000_hw *hw); -extern u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr); #endif diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h index dd688d45e9c..385be601666 100644 --- a/drivers/net/ixgbe/ixgbe.h +++ b/drivers/net/ixgbe/ixgbe.h @@ -267,7 +267,8 @@ struct ixgbe_adapter { enum ixgbe_fc_mode last_lfc_mode; /* Interrupt Throttle Rate */ - u32 itr_setting; + u32 rx_itr_setting; + u32 tx_itr_setting; u16 eitr_low; u16 eitr_high; @@ -351,7 +352,8 @@ struct ixgbe_adapter { struct ixgbe_hw_stats stats; /* Interrupt Throttle Rate */ - u32 eitr_param; + u32 rx_eitr_param; + u32 tx_eitr_param; unsigned long state; u64 tx_busy; diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c index 026e94a9984..53b0a668025 100644 --- a/drivers/net/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ixgbe/ixgbe_ethtool.c @@ -1929,7 +1929,7 @@ static int ixgbe_get_coalesce(struct net_device *netdev, ec->tx_max_coalesced_frames_irq = adapter->tx_ring[0].work_limit; /* only valid if in constant ITR mode */ - switch (adapter->itr_setting) { + switch (adapter->rx_itr_setting) { case 0: /* throttling disabled */ ec->rx_coalesce_usecs = 0; @@ -1940,9 +1940,25 @@ static int ixgbe_get_coalesce(struct net_device *netdev, break; default: /* fixed interrupt rate mode */ - ec->rx_coalesce_usecs = 1000000/adapter->eitr_param; + ec->rx_coalesce_usecs = 1000000/adapter->rx_eitr_param; break; } + + /* only valid if in constant ITR mode */ + switch (adapter->tx_itr_setting) { + case 0: + /* throttling disabled */ + ec->tx_coalesce_usecs = 0; + break; + case 1: + /* dynamic ITR mode */ + ec->tx_coalesce_usecs = 1; + break; + default: + ec->tx_coalesce_usecs = 1000000/adapter->tx_eitr_param; + break; + } + return 0; } @@ -1953,6 +1969,14 @@ static int ixgbe_set_coalesce(struct net_device *netdev, struct ixgbe_q_vector *q_vector; int i; + /* + * don't accept tx specific changes if we've got mixed RxTx vectors + * test and jump out here if needed before changing the rx numbers + */ + if ((1000000/ec->tx_coalesce_usecs) != adapter->tx_eitr_param && + adapter->q_vector[0]->txr_count && adapter->q_vector[0]->rxr_count) + return -EINVAL; + if (ec->tx_max_coalesced_frames_irq) adapter->tx_ring[0].work_limit = ec->tx_max_coalesced_frames_irq; @@ -1963,26 +1987,49 @@ static int ixgbe_set_coalesce(struct net_device *netdev, return -EINVAL; /* store the value in ints/second */ - adapter->eitr_param = 1000000/ec->rx_coalesce_usecs; + adapter->rx_eitr_param = 1000000/ec->rx_coalesce_usecs; /* static value of interrupt rate */ - adapter->itr_setting = adapter->eitr_param; + adapter->rx_itr_setting = adapter->rx_eitr_param; /* clear the lower bit as its used for dynamic state */ - adapter->itr_setting &= ~1; + adapter->rx_itr_setting &= ~1; } else if (ec->rx_coalesce_usecs == 1) { /* 1 means dynamic mode */ - adapter->eitr_param = 20000; - adapter->itr_setting = 1; + adapter->rx_eitr_param = 20000; + adapter->rx_itr_setting = 1; } else { /* * any other value means disable eitr, which is best * served by setting the interrupt rate very high */ if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) - adapter->eitr_param = IXGBE_MAX_RSC_INT_RATE; + adapter->rx_eitr_param = IXGBE_MAX_RSC_INT_RATE; else - adapter->eitr_param = IXGBE_MAX_INT_RATE; - adapter->itr_setting = 0; + adapter->rx_eitr_param = IXGBE_MAX_INT_RATE; + adapter->rx_itr_setting = 0; + } + + if (ec->tx_coalesce_usecs > 1) { + /* check the limits */ + if ((1000000/ec->tx_coalesce_usecs > IXGBE_MAX_INT_RATE) || + (1000000/ec->tx_coalesce_usecs < IXGBE_MIN_INT_RATE)) + return -EINVAL; + + /* store the value in ints/second */ + adapter->tx_eitr_param = 1000000/ec->tx_coalesce_usecs; + + /* static value of interrupt rate */ + adapter->tx_itr_setting = adapter->tx_eitr_param; + + /* clear the lower bit as its used for dynamic state */ + adapter->tx_itr_setting &= ~1; + } else if (ec->tx_coalesce_usecs == 1) { + /* 1 means dynamic mode */ + adapter->tx_eitr_param = 10000; + adapter->tx_itr_setting = 1; + } else { + adapter->tx_eitr_param = IXGBE_MAX_INT_RATE; + adapter->tx_itr_setting = 0; } /* MSI/MSIx Interrupt Mode */ @@ -1992,17 +2039,17 @@ static int ixgbe_set_coalesce(struct net_device *netdev, for (i = 0; i < num_vectors; i++) { q_vector = adapter->q_vector[i]; if (q_vector->txr_count && !q_vector->rxr_count) - /* tx vector gets half the rate */ - q_vector->eitr = (adapter->eitr_param >> 1); + /* tx only */ + q_vector->eitr = adapter->tx_eitr_param; else /* rx only or mixed */ - q_vector->eitr = adapter->eitr_param; + q_vector->eitr = adapter->rx_eitr_param; ixgbe_write_eitr(q_vector); } /* Legacy Interrupt Mode */ } else { q_vector = adapter->q_vector[0]; - q_vector->eitr = adapter->eitr_param; + q_vector->eitr = adapter->rx_eitr_param; ixgbe_write_eitr(q_vector); } diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 59ad9590e70..c407bd9de0d 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -926,12 +926,12 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) r_idx + 1); } - /* if this is a tx only vector halve the interrupt rate */ if (q_vector->txr_count && !q_vector->rxr_count) - q_vector->eitr = (adapter->eitr_param >> 1); + /* tx only */ + q_vector->eitr = adapter->tx_eitr_param; else if (q_vector->rxr_count) - /* rx only */ - q_vector->eitr = adapter->eitr_param; + /* rx or mixed */ + q_vector->eitr = adapter->rx_eitr_param; ixgbe_write_eitr(q_vector); } @@ -1359,7 +1359,7 @@ static int ixgbe_clean_rxonly(struct napi_struct *napi, int budget) /* If all Rx work done, exit the polling mode */ if (work_done < budget) { napi_complete(napi); - if (adapter->itr_setting & 1) + if (adapter->rx_itr_setting & 1) ixgbe_set_itr_msix(q_vector); if (!test_bit(__IXGBE_DOWN, &adapter->state)) ixgbe_irq_enable_queues(adapter, @@ -1420,7 +1420,7 @@ static int ixgbe_clean_rxtx_many(struct napi_struct *napi, int budget) /* If all Rx work done, exit the polling mode */ if (work_done < budget) { napi_complete(napi); - if (adapter->itr_setting & 1) + if (adapter->rx_itr_setting & 1) ixgbe_set_itr_msix(q_vector); if (!test_bit(__IXGBE_DOWN, &adapter->state)) ixgbe_irq_enable_queues(adapter, @@ -1458,10 +1458,10 @@ static int ixgbe_clean_txonly(struct napi_struct *napi, int budget) if (!ixgbe_clean_tx_irq(q_vector, tx_ring)) work_done = budget; - /* If all Rx work done, exit the polling mode */ + /* If all Tx work done, exit the polling mode */ if (work_done < budget) { napi_complete(napi); - if (adapter->itr_setting & 1) + if (adapter->tx_itr_setting & 1) ixgbe_set_itr_msix(q_vector); if (!test_bit(__IXGBE_DOWN, &adapter->state)) ixgbe_irq_enable_queues(adapter, ((u64)1 << q_vector->v_idx)); @@ -1848,7 +1848,7 @@ static void ixgbe_configure_msi_and_legacy(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; IXGBE_WRITE_REG(hw, IXGBE_EITR(0), - EITR_INTS_PER_SEC_TO_REG(adapter->eitr_param)); + EITR_INTS_PER_SEC_TO_REG(adapter->rx_eitr_param)); ixgbe_set_ivar(adapter, 0, 0, 0); ixgbe_set_ivar(adapter, 1, 0, 0); @@ -1970,6 +1970,50 @@ static u32 ixgbe_setup_mrqc(struct ixgbe_adapter *adapter) } /** + * ixgbe_configure_rscctl - enable RSC for the indicated ring + * @adapter: address of board private structure + * @index: index of ring to set + * @rx_buf_len: rx buffer length + **/ +static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, int index, + int rx_buf_len) +{ + struct ixgbe_ring *rx_ring; + struct ixgbe_hw *hw = &adapter->hw; + int j; + u32 rscctrl; + + rx_ring = &adapter->rx_ring[index]; + j = rx_ring->reg_idx; + rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(j)); + rscctrl |= IXGBE_RSCCTL_RSCEN; + /* + * we must limit the number of descriptors so that the + * total size of max desc * buf_len is not greater + * than 65535 + */ + if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) { +#if (MAX_SKB_FRAGS > 16) + rscctrl |= IXGBE_RSCCTL_MAXDESC_16; +#elif (MAX_SKB_FRAGS > 8) + rscctrl |= IXGBE_RSCCTL_MAXDESC_8; +#elif (MAX_SKB_FRAGS > 4) + rscctrl |= IXGBE_RSCCTL_MAXDESC_4; +#else + rscctrl |= IXGBE_RSCCTL_MAXDESC_1; +#endif + } else { + if (rx_buf_len < IXGBE_RXBUFFER_4096) + rscctrl |= IXGBE_RSCCTL_MAXDESC_16; + else if (rx_buf_len < IXGBE_RXBUFFER_8192) + rscctrl |= IXGBE_RSCCTL_MAXDESC_8; + else + rscctrl |= IXGBE_RSCCTL_MAXDESC_4; + } + IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(j), rscctrl); +} + +/** * ixgbe_configure_rx - Configure 8259x Receive Unit after Reset * @adapter: board private structure * @@ -1990,7 +2034,6 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) u32 fctrl, hlreg0; u32 reta = 0, mrqc = 0; u32 rdrxctl; - u32 rscctrl; int rx_buf_len; /* Decide whether to use packet split mode or not */ @@ -2148,36 +2191,9 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { /* Enable 82599 HW-RSC */ - for (i = 0; i < adapter->num_rx_queues; i++) { - rx_ring = &adapter->rx_ring[i]; - j = rx_ring->reg_idx; - rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(j)); - rscctrl |= IXGBE_RSCCTL_RSCEN; - /* - * we must limit the number of descriptors so that the - * total size of max desc * buf_len is not greater - * than 65535 - */ - if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) { -#if (MAX_SKB_FRAGS > 16) - rscctrl |= IXGBE_RSCCTL_MAXDESC_16; -#elif (MAX_SKB_FRAGS > 8) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; -#elif (MAX_SKB_FRAGS > 4) - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; -#else - rscctrl |= IXGBE_RSCCTL_MAXDESC_1; -#endif - } else { - if (rx_buf_len < IXGBE_RXBUFFER_4096) - rscctrl |= IXGBE_RSCCTL_MAXDESC_16; - else if (rx_buf_len < IXGBE_RXBUFFER_8192) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; - else - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; - } - IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(j), rscctrl); - } + for (i = 0; i < adapter->num_rx_queues; i++) + ixgbe_configure_rscctl(adapter, i, rx_buf_len); + /* Disable RSC for ACK packets */ IXGBE_WRITE_REG(hw, IXGBE_RSCDBU, (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU))); @@ -2926,6 +2942,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter) ixgbe_napi_disable_all(adapter); + clear_bit(__IXGBE_SFP_MODULE_NOT_FOUND, &adapter->state); + del_timer_sync(&adapter->sfp_timer); del_timer_sync(&adapter->watchdog_timer); cancel_work_sync(&adapter->watchdog_task); @@ -2989,7 +3007,7 @@ static int ixgbe_poll(struct napi_struct *napi, int budget) /* If budget not fully consumed, exit the polling mode */ if (work_done < budget) { napi_complete(napi); - if (adapter->itr_setting & 1) + if (adapter->rx_itr_setting & 1) ixgbe_set_itr(adapter); if (!test_bit(__IXGBE_DOWN, &adapter->state)) ixgbe_irq_enable_queues(adapter, IXGBE_EIMS_RTX_QUEUE); @@ -3599,7 +3617,10 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter) if (!q_vector) goto err_out; q_vector->adapter = adapter; - q_vector->eitr = adapter->eitr_param; + if (q_vector->txr_count && !q_vector->rxr_count) + q_vector->eitr = adapter->tx_eitr_param; + else + q_vector->eitr = adapter->rx_eitr_param; q_vector->v_idx = q_idx; netif_napi_add(adapter->netdev, &q_vector->napi, (*poll), 64); adapter->q_vector[q_idx] = q_vector; @@ -3868,8 +3889,10 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) hw->fc.disable_fc_autoneg = false; /* enable itr by default in dynamic mode */ - adapter->itr_setting = 1; - adapter->eitr_param = 20000; + adapter->rx_itr_setting = 1; + adapter->rx_eitr_param = 20000; + adapter->tx_itr_setting = 1; + adapter->tx_eitr_param = 10000; /* set defaults for eitr in MegaBytes */ adapter->eitr_low = 10; diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index f7bdde111df..b5aa974827e 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -1469,6 +1469,7 @@ netxen_nic_resume(struct pci_dev *pdev) } netxen_schedule_work(adapter, netxen_fw_poll_work, FW_POLL_DELAY); + return 0; err_out_detach: netxen_nic_detach(adapter); @@ -1903,12 +1904,13 @@ static void netxen_tx_timeout_task(struct work_struct *work) netif_wake_queue(adapter->netdev); - goto done; + clear_bit(__NX_RESETTING, &adapter->state); } else { + clear_bit(__NX_RESETTING, &adapter->state); if (!netxen_nic_reset_context(adapter)) { adapter->netdev->trans_start = jiffies; - goto done; + return; } /* context reset failed, fall through for fw reset */ @@ -1916,8 +1918,6 @@ static void netxen_tx_timeout_task(struct work_struct *work) request_reset: adapter->need_fw_reset = 1; -done: - clear_bit(__NX_RESETTING, &adapter->state); } struct net_device_stats *netxen_nic_get_stats(struct net_device *netdev) diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 97db1c73234..474876c879c 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -340,12 +340,11 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link) base = &virt[hw_info[i].offset & (req.Size-1)]; if ((readb(base+0) == hw_info[i].a0) && (readb(base+2) == hw_info[i].a1) && - (readb(base+4) == hw_info[i].a2)) - break; - } - if (i < NR_INFO) { - for (j = 0; j < 6; j++) - dev->dev_addr[j] = readb(base + (j<<1)); + (readb(base+4) == hw_info[i].a2)) { + for (j = 0; j < 6; j++) + dev->dev_addr[j] = readb(base + (j<<1)); + break; + } } iounmap(virt); diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c index 07a7e4b8f8f..cc4b2f99989 100644 --- a/drivers/net/sfc/efx.c +++ b/drivers/net/sfc/efx.c @@ -884,13 +884,12 @@ static int efx_wanted_rx_queues(void) int count; int cpu; - if (unlikely(!alloc_cpumask_var(&core_mask, GFP_KERNEL))) { + if (unlikely(!zalloc_cpumask_var(&core_mask, GFP_KERNEL))) { printk(KERN_WARNING "sfc: RSS disabled due to allocation failure\n"); return 1; } - cpumask_clear(core_mask); count = 0; for_each_online_cpu(cpu) { if (!cpumask_test_cpu(cpu, core_mask)) { diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 15140f9f2e9..ef1165718dd 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -1497,7 +1497,6 @@ static int sky2_up(struct net_device *dev) if (ramsize > 0) { u32 rxspace; - hw->flags |= SKY2_HW_RAM_BUFFER; pr_debug(PFX "%s: ram buffer %dK\n", dev->name, ramsize); if (ramsize < 16) rxspace = ramsize / 2; @@ -2926,6 +2925,9 @@ static int __devinit sky2_init(struct sky2_hw *hw) ++hw->ports; } + if (sky2_read8(hw, B2_E_0)) + hw->flags |= SKY2_HW_RAM_BUFFER; + return 0; } diff --git a/drivers/net/sunvnet.c b/drivers/net/sunvnet.c index f1e5e4542c2..bc74db0d12f 100644 --- a/drivers/net/sunvnet.c +++ b/drivers/net/sunvnet.c @@ -1016,7 +1016,6 @@ static const struct net_device_ops vnet_ops = { .ndo_open = vnet_open, .ndo_stop = vnet_close, .ndo_set_multicast_list = vnet_set_rx_mode, - .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = vnet_set_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = vnet_tx_timeout, diff --git a/drivers/net/tun.c b/drivers/net/tun.c index d3ee1994b02..4fdfa2ae541 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -946,8 +946,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) char *name; unsigned long flags = 0; - err = -EINVAL; - if (!capable(CAP_NET_ADMIN)) return -EPERM; err = security_tun_dev_create(); @@ -964,7 +962,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) flags |= TUN_TAP_DEV; name = "tap%d"; } else - goto failed; + return -EINVAL; if (*ifr->ifr_name) name = ifr->ifr_name; diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index e2a39b9be96..e391ef969c2 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -263,6 +263,7 @@ static int kaweth_control(struct kaweth_device *kaweth, int timeout) { struct usb_ctrlrequest *dr; + int retval; dbg("kaweth_control()"); @@ -278,18 +279,21 @@ static int kaweth_control(struct kaweth_device *kaweth, return -ENOMEM; } - dr->bRequestType= requesttype; + dr->bRequestType = requesttype; dr->bRequest = request; dr->wValue = cpu_to_le16(value); dr->wIndex = cpu_to_le16(index); dr->wLength = cpu_to_le16(size); - return kaweth_internal_control_msg(kaweth->dev, - pipe, - dr, - data, - size, - timeout); + retval = kaweth_internal_control_msg(kaweth->dev, + pipe, + dr, + data, + size, + timeout); + + kfree(dr); + return retval; } /**************************************************************** diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 938fb3530a7..c6c922247d0 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1227,7 +1227,7 @@ static const struct driver_info smsc95xx_info = { .rx_fixup = smsc95xx_rx_fixup, .tx_fixup = smsc95xx_tx_fixup, .status = smsc95xx_status, - .flags = FLAG_ETHER, + .flags = FLAG_ETHER | FLAG_SEND_ZLP, }; static const struct usb_device_id products[] = { @@ -1237,10 +1237,75 @@ static const struct usb_device_id products[] = { .driver_info = (unsigned long) &smsc95xx_info, }, { + /* SMSC9505 USB Ethernet Device */ + USB_DEVICE(0x0424, 0x9505), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { + /* SMSC9500A USB Ethernet Device */ + USB_DEVICE(0x0424, 0x9E00), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { + /* SMSC9505A USB Ethernet Device */ + USB_DEVICE(0x0424, 0x9E01), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { /* SMSC9512/9514 USB Hub & Ethernet Device */ USB_DEVICE(0x0424, 0xec00), .driver_info = (unsigned long) &smsc95xx_info, }, + { + /* SMSC9500 USB Ethernet Device (SAL10) */ + USB_DEVICE(0x0424, 0x9900), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { + /* SMSC9505 USB Ethernet Device (SAL10) */ + USB_DEVICE(0x0424, 0x9901), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { + /* SMSC9500A USB Ethernet Device (SAL10) */ + USB_DEVICE(0x0424, 0x9902), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { + /* SMSC9505A USB Ethernet Device (SAL10) */ + USB_DEVICE(0x0424, 0x9903), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { + /* SMSC9512/9514 USB Hub & Ethernet Device (SAL10) */ + USB_DEVICE(0x0424, 0x9904), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { + /* SMSC9500A USB Ethernet Device (HAL) */ + USB_DEVICE(0x0424, 0x9905), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { + /* SMSC9505A USB Ethernet Device (HAL) */ + USB_DEVICE(0x0424, 0x9906), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { + /* SMSC9500 USB Ethernet Device (Alternate ID) */ + USB_DEVICE(0x0424, 0x9907), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { + /* SMSC9500A USB Ethernet Device (Alternate ID) */ + USB_DEVICE(0x0424, 0x9908), + .driver_info = (unsigned long) &smsc95xx_info, + }, + { + /* SMSC9512/9514 USB Hub & Ethernet Device (Alternate ID) */ + USB_DEVICE(0x0424, 0x9909), + .driver_info = (unsigned long) &smsc95xx_info, + }, { }, /* END */ }; MODULE_DEVICE_TABLE(usb, products); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 24b36f79515..ca5ca5ae061 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1049,7 +1049,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, * NOTE: strictly conforming cdc-ether devices should expect * the ZLP here, but ignore the one-byte packet. */ - if ((length % dev->maxpacket) == 0) { + if (!(info->flags & FLAG_SEND_ZLP) && (length % dev->maxpacket) == 0) { urb->transfer_buffer_length++; if (skb_tailroom(skb)) { skb->data[skb->len] = 0; diff --git a/drivers/net/wireless/arlan-proc.c b/drivers/net/wireless/arlan-proc.c index 2ab1d59870f..a8b689635a3 100644 --- a/drivers/net/wireless/arlan-proc.c +++ b/drivers/net/wireless/arlan-proc.c @@ -402,7 +402,7 @@ static int arlan_setup_card_by_book(struct net_device *dev) static char arlan_drive_info[ARLAN_STR_SIZE] = "A655\n\0"; -static int arlan_sysctl_info(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_info(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -629,7 +629,7 @@ final: *lenp = pos; if (!write) - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); else { *lenp = 0; @@ -639,7 +639,7 @@ final: } -static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_info161719(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -669,11 +669,11 @@ static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp final: *lenp = pos; - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); return retv; } -static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -698,11 +698,11 @@ static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp SARLBNpln(u_char, txBuffer, 0x800); final: *lenp = pos; - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); return retv; } -static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -726,11 +726,11 @@ static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp SARLBNpln(u_char, rxBuffer, 0x800); final: *lenp = pos; - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); return retv; } -static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_info18(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int i; @@ -756,7 +756,7 @@ static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp, final: *lenp = pos; - retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); + retv = proc_dostring(ctl, write, buffer, lenp, ppos); return retv; } @@ -766,7 +766,7 @@ final: static char conf_reset_result[200]; -static int arlan_configure(ctl_table * ctl, int write, struct file *filp, +static int arlan_configure(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int pos = 0; @@ -788,10 +788,10 @@ static int arlan_configure(ctl_table * ctl, int write, struct file *filp, return -1; *lenp = pos; - return proc_dostring(ctl, write, filp, buffer, lenp, ppos); + return proc_dostring(ctl, write, buffer, lenp, ppos); } -static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp, +static int arlan_sysctl_reset(ctl_table * ctl, int write, void __user *buffer, size_t * lenp, loff_t *ppos) { int pos = 0; @@ -811,7 +811,7 @@ static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp, } else return -1; *lenp = pos + 3; - return proc_dostring(ctl, write, filp, buffer, lenp, ppos); + return proc_dostring(ctl, write, buffer, lenp, ppos); } diff --git a/drivers/net/wireless/ath/ar9170/usb.c b/drivers/net/wireless/ath/ar9170/usb.c index e0138ac8bf5..e974e5829e1 100644 --- a/drivers/net/wireless/ath/ar9170/usb.c +++ b/drivers/net/wireless/ath/ar9170/usb.c @@ -64,6 +64,8 @@ static struct usb_device_id ar9170_usb_ids[] = { { USB_DEVICE(0x0cf3, 0x9170) }, /* Atheros TG121N */ { USB_DEVICE(0x0cf3, 0x1001) }, + /* TP-Link TL-WN821N v2 */ + { USB_DEVICE(0x0cf3, 0x1002) }, /* Cace Airpcap NX */ { USB_DEVICE(0xcace, 0x0300) }, /* D-Link DWA 160A */ diff --git a/drivers/net/wireless/ath/ath9k/calib.c b/drivers/net/wireless/ath/ath9k/calib.c index 3234995e888..0ad6d0b76e9 100644 --- a/drivers/net/wireless/ath/ath9k/calib.c +++ b/drivers/net/wireless/ath/ath9k/calib.c @@ -609,14 +609,24 @@ void ath9k_hw_loadnf(struct ath_hw *ah, struct ath9k_channel *chan) AR_PHY_CH1_EXT_CCA, AR_PHY_CH2_EXT_CCA }; - u8 chainmask; + u8 chainmask, rx_chain_status; + rx_chain_status = REG_READ(ah, AR_PHY_RX_CHAINMASK); if (AR_SREV_9285(ah)) chainmask = 0x9; - else if (AR_SREV_9280(ah) || AR_SREV_9287(ah)) - chainmask = 0x1B; - else - chainmask = 0x3F; + else if (AR_SREV_9280(ah) || AR_SREV_9287(ah)) { + if ((rx_chain_status & 0x2) || (rx_chain_status & 0x4)) + chainmask = 0x1B; + else + chainmask = 0x09; + } else { + if (rx_chain_status & 0x4) + chainmask = 0x3F; + else if (rx_chain_status & 0x2) + chainmask = 0x1B; + else + chainmask = 0x09; + } h = ah->nfCalHist; @@ -697,6 +707,8 @@ void ath9k_init_nfcal_hist_buffer(struct ath_hw *ah) noise_floor = AR_PHY_CCA_MAX_AR9280_GOOD_VALUE; else if (AR_SREV_9285(ah)) noise_floor = AR_PHY_CCA_MAX_AR9285_GOOD_VALUE; + else if (AR_SREV_9287(ah)) + noise_floor = AR_PHY_CCA_MAX_AR9287_GOOD_VALUE; else noise_floor = AR_PHY_CCA_MAX_AR5416_GOOD_VALUE; @@ -924,6 +936,7 @@ static inline void ath9k_hw_9285_pa_cal(struct ath_hw *ah, bool is_reset) regVal |= (1 << (19 + i)); REG_WRITE(ah, 0x7834, regVal); udelay(1); + regVal = REG_READ(ah, 0x7834); regVal &= (~(0x1 << (19 + i))); reg_field = MS(REG_READ(ah, 0x7840), AR9285_AN_RXTXBB1_SPARE9); regVal |= (reg_field << (19 + i)); diff --git a/drivers/net/wireless/ath/ath9k/calib.h b/drivers/net/wireless/ath/ath9k/calib.h index 019bcbba40e..9028ab193e4 100644 --- a/drivers/net/wireless/ath/ath9k/calib.h +++ b/drivers/net/wireless/ath/ath9k/calib.h @@ -28,6 +28,7 @@ extern const struct ath9k_percal_data adc_init_dc_cal; #define AR_PHY_CCA_MAX_AR5416_GOOD_VALUE -85 #define AR_PHY_CCA_MAX_AR9280_GOOD_VALUE -112 #define AR_PHY_CCA_MAX_AR9285_GOOD_VALUE -118 +#define AR_PHY_CCA_MAX_AR9287_GOOD_VALUE -118 #define AR_PHY_CCA_MAX_HIGH_VALUE -62 #define AR_PHY_CCA_MIN_BAD_VALUE -140 #define AR_PHY_CCA_FILTERWINDOW_LENGTH_INIT 3 diff --git a/drivers/net/wireless/ath/ath9k/eeprom_def.c b/drivers/net/wireless/ath/ath9k/eeprom_def.c index ae7fb5dcb26..4071fc91da0 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom_def.c +++ b/drivers/net/wireless/ath/ath9k/eeprom_def.c @@ -509,6 +509,8 @@ static void ath9k_hw_def_set_board_values(struct ath_hw *ah, REG_RMW_FIELD(ah, AR_AN_TOP1, AR_AN_TOP1_DACIPMODE, eep->baseEepHeader.dacLpMode); + udelay(100); + REG_RMW_FIELD(ah, AR_PHY_FRAME_CTL, AR_PHY_FRAME_CTL_TX_CLIP, pModal->miscBits >> 2); @@ -902,7 +904,7 @@ static void ath9k_hw_set_def_power_per_rate_table(struct ath_hw *ah, u16 powerLimit) { #define REDUCE_SCALED_POWER_BY_TWO_CHAIN 6 /* 10*log10(2)*2 */ -#define REDUCE_SCALED_POWER_BY_THREE_CHAIN 10 /* 10*log10(3)*2 */ +#define REDUCE_SCALED_POWER_BY_THREE_CHAIN 9 /* 10*log10(3)*2 */ struct ath_regulatory *regulatory = ath9k_hw_regulatory(ah); struct ar5416_eeprom_def *pEepData = &ah->eeprom.def; diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index b6c6cca0781..ca7694caf36 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -842,7 +842,7 @@ static void ath9k_hw_init_mode_regs(struct ath_hw *ah) static void ath9k_hw_init_mode_gain_regs(struct ath_hw *ah) { - if (AR_SREV_9287_11(ah)) + if (AR_SREV_9287_11_OR_LATER(ah)) INIT_INI_ARRAY(&ah->iniModesRxGain, ar9287Modes_rx_gain_9287_1_1, ARRAY_SIZE(ar9287Modes_rx_gain_9287_1_1), 6); @@ -853,7 +853,7 @@ static void ath9k_hw_init_mode_gain_regs(struct ath_hw *ah) else if (AR_SREV_9280_20(ah)) ath9k_hw_init_rxgain_ini(ah); - if (AR_SREV_9287_11(ah)) { + if (AR_SREV_9287_11_OR_LATER(ah)) { INIT_INI_ARRAY(&ah->iniModesTxGain, ar9287Modes_tx_gain_9287_1_1, ARRAY_SIZE(ar9287Modes_tx_gain_9287_1_1), 6); @@ -965,7 +965,7 @@ int ath9k_hw_init(struct ath_hw *ah) ath9k_hw_init_mode_regs(ah); if (ah->is_pciexpress) - ath9k_hw_configpcipowersave(ah, 0); + ath9k_hw_configpcipowersave(ah, 0, 0); else ath9k_hw_disablepcie(ah); @@ -1273,6 +1273,15 @@ static void ath9k_hw_override_ini(struct ath_hw *ah, */ REG_SET_BIT(ah, AR_DIAG_SW, (AR_DIAG_RX_DIS | AR_DIAG_RX_ABORT)); + if (AR_SREV_9280_10_OR_LATER(ah)) { + val = REG_READ(ah, AR_PCU_MISC_MODE2) & + (~AR_PCU_MISC_MODE2_HWWAR1); + + if (AR_SREV_9287_10_OR_LATER(ah)) + val = val & (~AR_PCU_MISC_MODE2_HWWAR2); + + REG_WRITE(ah, AR_PCU_MISC_MODE2, val); + } if (!AR_SREV_5416_20_OR_LATER(ah) || AR_SREV_9280_10_OR_LATER(ah)) @@ -1784,7 +1793,7 @@ static void ath9k_hw_set_regs(struct ath_hw *ah, struct ath9k_channel *chan, static bool ath9k_hw_chip_reset(struct ath_hw *ah, struct ath9k_channel *chan) { - if (OLC_FOR_AR9280_20_LATER) { + if (AR_SREV_9280(ah) && ah->eep_ops->get_eeprom(ah, EEP_OL_PWRCTRL)) { if (!ath9k_hw_set_reset_reg(ah, ATH9K_RESET_POWER_ON)) return false; } else if (!ath9k_hw_set_reset_reg(ah, ATH9K_RESET_WARM)) @@ -2338,6 +2347,7 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan, struct ath9k_channel *curchan = ah->curchan; u32 saveDefAntenna; u32 macStaId1; + u64 tsf = 0; int i, rx_chainmask, r; ah->extprotspacing = sc->ht_extprotspacing; @@ -2347,7 +2357,7 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan, if (!ath9k_hw_setpower(ah, ATH9K_PM_AWAKE)) return -EIO; - if (curchan) + if (curchan && !ah->chip_fullsleep) ath9k_hw_getnf(ah, curchan); if (bChannelChange && @@ -2356,8 +2366,8 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan, (chan->channel != ah->curchan->channel) && ((chan->channelFlags & CHANNEL_ALL) == (ah->curchan->channelFlags & CHANNEL_ALL)) && - (!AR_SREV_9280(ah) || (!IS_CHAN_A_5MHZ_SPACED(chan) && - !IS_CHAN_A_5MHZ_SPACED(ah->curchan)))) { + !(AR_SREV_9280(ah) || IS_CHAN_A_5MHZ_SPACED(chan) || + IS_CHAN_A_5MHZ_SPACED(ah->curchan))) { if (ath9k_hw_channel_change(ah, chan, sc->tx_chan_width)) { ath9k_hw_loadnf(ah, ah->curchan); @@ -2372,6 +2382,10 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan, macStaId1 = REG_READ(ah, AR_STA_ID1) & AR_STA_ID1_BASE_RATE_11B; + /* For chips on which RTC reset is done, save TSF before it gets cleared */ + if (AR_SREV_9280(ah) && ah->eep_ops->get_eeprom(ah, EEP_OL_PWRCTRL)) + tsf = ath9k_hw_gettsf64(ah); + saveLedState = REG_READ(ah, AR_CFG_LED) & (AR_CFG_LED_ASSOC_CTL | AR_CFG_LED_MODE_SEL | AR_CFG_LED_BLINK_THRESH_SEL | AR_CFG_LED_BLINK_SLOW); @@ -2398,6 +2412,10 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan, udelay(50); } + /* Restore TSF */ + if (tsf && AR_SREV_9280(ah) && ah->eep_ops->get_eeprom(ah, EEP_OL_PWRCTRL)) + ath9k_hw_settsf64(ah, tsf); + if (AR_SREV_9280_10_OR_LATER(ah)) REG_SET_BIT(ah, AR_GPIO_INPUT_EN_VAL, AR_GPIO_JTAG_DISABLE); @@ -3005,9 +3023,10 @@ void ath9k_ps_restore(struct ath_softc *sc) * Programming the SerDes must go through the same 288 bit serial shift * register as the other analog registers. Hence the 9 writes. */ -void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore) +void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore, int power_off) { u8 i; + u32 val; if (ah->is_pciexpress != true) return; @@ -3017,84 +3036,113 @@ void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore) return; /* Nothing to do on restore for 11N */ - if (restore) - return; + if (!restore) { + if (AR_SREV_9280_20_OR_LATER(ah)) { + /* + * AR9280 2.0 or later chips use SerDes values from the + * initvals.h initialized depending on chipset during + * ath9k_hw_init() + */ + for (i = 0; i < ah->iniPcieSerdes.ia_rows; i++) { + REG_WRITE(ah, INI_RA(&ah->iniPcieSerdes, i, 0), + INI_RA(&ah->iniPcieSerdes, i, 1)); + } + } else if (AR_SREV_9280(ah) && + (ah->hw_version.macRev == AR_SREV_REVISION_9280_10)) { + REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fd00); + REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924); + + /* RX shut off when elecidle is asserted */ + REG_WRITE(ah, AR_PCIE_SERDES, 0xa8000019); + REG_WRITE(ah, AR_PCIE_SERDES, 0x13160820); + REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980560); + + /* Shut off CLKREQ active in L1 */ + if (ah->config.pcie_clock_req) + REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffc); + else + REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffd); - if (AR_SREV_9280_20_OR_LATER(ah)) { - /* - * AR9280 2.0 or later chips use SerDes values from the - * initvals.h initialized depending on chipset during - * ath9k_hw_init() - */ - for (i = 0; i < ah->iniPcieSerdes.ia_rows; i++) { - REG_WRITE(ah, INI_RA(&ah->iniPcieSerdes, i, 0), - INI_RA(&ah->iniPcieSerdes, i, 1)); - } - } else if (AR_SREV_9280(ah) && - (ah->hw_version.macRev == AR_SREV_REVISION_9280_10)) { - REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fd00); - REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924); + REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40); + REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554); + REG_WRITE(ah, AR_PCIE_SERDES, 0x00043007); - /* RX shut off when elecidle is asserted */ - REG_WRITE(ah, AR_PCIE_SERDES, 0xa8000019); - REG_WRITE(ah, AR_PCIE_SERDES, 0x13160820); - REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980560); + /* Load the new settings */ + REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000); - /* Shut off CLKREQ active in L1 */ - if (ah->config.pcie_clock_req) - REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffc); - else - REG_WRITE(ah, AR_PCIE_SERDES, 0x401deffd); - - REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40); - REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554); - REG_WRITE(ah, AR_PCIE_SERDES, 0x00043007); + } else { + REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fc00); + REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924); - /* Load the new settings */ - REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000); + /* RX shut off when elecidle is asserted */ + REG_WRITE(ah, AR_PCIE_SERDES, 0x28000039); + REG_WRITE(ah, AR_PCIE_SERDES, 0x53160824); + REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980579); - } else { - REG_WRITE(ah, AR_PCIE_SERDES, 0x9248fc00); - REG_WRITE(ah, AR_PCIE_SERDES, 0x24924924); + /* + * Ignore ah->ah_config.pcie_clock_req setting for + * pre-AR9280 11n + */ + REG_WRITE(ah, AR_PCIE_SERDES, 0x001defff); - /* RX shut off when elecidle is asserted */ - REG_WRITE(ah, AR_PCIE_SERDES, 0x28000039); - REG_WRITE(ah, AR_PCIE_SERDES, 0x53160824); - REG_WRITE(ah, AR_PCIE_SERDES, 0xe5980579); + REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40); + REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554); + REG_WRITE(ah, AR_PCIE_SERDES, 0x000e3007); - /* - * Ignore ah->ah_config.pcie_clock_req setting for - * pre-AR9280 11n - */ - REG_WRITE(ah, AR_PCIE_SERDES, 0x001defff); + /* Load the new settings */ + REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000); + } - REG_WRITE(ah, AR_PCIE_SERDES, 0x1aaabe40); - REG_WRITE(ah, AR_PCIE_SERDES, 0xbe105554); - REG_WRITE(ah, AR_PCIE_SERDES, 0x000e3007); + udelay(1000); - /* Load the new settings */ - REG_WRITE(ah, AR_PCIE_SERDES2, 0x00000000); - } + /* set bit 19 to allow forcing of pcie core into L1 state */ + REG_SET_BIT(ah, AR_PCIE_PM_CTRL, AR_PCIE_PM_CTRL_ENA); - udelay(1000); + /* Several PCIe massages to ensure proper behaviour */ + if (ah->config.pcie_waen) { + val = ah->config.pcie_waen; + if (!power_off) + val &= (~AR_WA_D3_L1_DISABLE); + } else { + if (AR_SREV_9285(ah) || AR_SREV_9271(ah) || + AR_SREV_9287(ah)) { + val = AR9285_WA_DEFAULT; + if (!power_off) + val &= (~AR_WA_D3_L1_DISABLE); + } else if (AR_SREV_9280(ah)) { + /* + * On AR9280 chips bit 22 of 0x4004 needs to be + * set otherwise card may disappear. + */ + val = AR9280_WA_DEFAULT; + if (!power_off) + val &= (~AR_WA_D3_L1_DISABLE); + } else + val = AR_WA_DEFAULT; + } - /* set bit 19 to allow forcing of pcie core into L1 state */ - REG_SET_BIT(ah, AR_PCIE_PM_CTRL, AR_PCIE_PM_CTRL_ENA); + REG_WRITE(ah, AR_WA, val); + } - /* Several PCIe massages to ensure proper behaviour */ - if (ah->config.pcie_waen) { - REG_WRITE(ah, AR_WA, ah->config.pcie_waen); - } else { - if (AR_SREV_9285(ah) || AR_SREV_9271(ah) || AR_SREV_9287(ah)) - REG_WRITE(ah, AR_WA, AR9285_WA_DEFAULT); + if (power_off) { /* - * On AR9280 chips bit 22 of 0x4004 needs to be set to - * otherwise card may disappear. + * Set PCIe workaround bits + * bit 14 in WA register (disable L1) should only + * be set when device enters D3 and be cleared + * when device comes back to D0. */ - else if (AR_SREV_9280(ah)) - REG_WRITE(ah, AR_WA, AR9280_WA_DEFAULT); - else - REG_WRITE(ah, AR_WA, AR_WA_DEFAULT); + if (ah->config.pcie_waen) { + if (ah->config.pcie_waen & AR_WA_D3_L1_DISABLE) + REG_SET_BIT(ah, AR_WA, AR_WA_D3_L1_DISABLE); + } else { + if (((AR_SREV_9285(ah) || AR_SREV_9271(ah) || + AR_SREV_9287(ah)) && + (AR9285_WA_DEFAULT & AR_WA_D3_L1_DISABLE)) || + (AR_SREV_9280(ah) && + (AR9280_WA_DEFAULT & AR_WA_D3_L1_DISABLE))) { + REG_SET_BIT(ah, AR_WA, AR_WA_D3_L1_DISABLE); + } + } } } @@ -3652,15 +3700,7 @@ void ath9k_hw_fill_cap_info(struct ath_hw *ah) } #endif - if ((ah->hw_version.macVersion == AR_SREV_VERSION_5416_PCI) || - (ah->hw_version.macVersion == AR_SREV_VERSION_5416_PCIE) || - (ah->hw_version.macVersion == AR_SREV_VERSION_9160) || - (ah->hw_version.macVersion == AR_SREV_VERSION_9100) || - (ah->hw_version.macVersion == AR_SREV_VERSION_9280) || - (ah->hw_version.macVersion == AR_SREV_VERSION_9285)) - pCap->hw_caps &= ~ATH9K_HW_CAP_AUTOSLEEP; - else - pCap->hw_caps |= ATH9K_HW_CAP_AUTOSLEEP; + pCap->hw_caps &= ~ATH9K_HW_CAP_AUTOSLEEP; if (AR_SREV_9280(ah) || AR_SREV_9285(ah)) pCap->hw_caps &= ~ATH9K_HW_CAP_4KB_SPLITTRANS; diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index 9106a0b537d..b8923457182 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -106,7 +106,7 @@ #define AH_TSF_WRITE_TIMEOUT 100 /* (us) */ #define AH_TIME_QUANTUM 10 #define AR_KEYTABLE_SIZE 128 -#define POWER_UP_TIME 200000 +#define POWER_UP_TIME 10000 #define SPUR_RSSI_THRESH 40 #define CAB_TIMEOUT_VAL 10 @@ -650,7 +650,7 @@ void ath9k_hw_set_sta_beacon_timers(struct ath_hw *ah, const struct ath9k_beacon_state *bs); bool ath9k_hw_setpower(struct ath_hw *ah, enum ath9k_power_mode mode); -void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore); +void ath9k_hw_configpcipowersave(struct ath_hw *ah, int restore, int power_off); /* Interrupt Handling */ bool ath9k_hw_intrpend(struct ath_hw *ah); diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 3dc7b5a13e6..52bed89063d 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1131,7 +1131,7 @@ void ath_radio_enable(struct ath_softc *sc) int r; ath9k_ps_wakeup(sc); - ath9k_hw_configpcipowersave(ah, 0); + ath9k_hw_configpcipowersave(ah, 0, 0); if (!ah->curchan) ah->curchan = ath_get_curchannel(sc, sc->hw); @@ -1202,7 +1202,7 @@ void ath_radio_disable(struct ath_softc *sc) spin_unlock_bh(&sc->sc_resetlock); ath9k_hw_phy_disable(ah); - ath9k_hw_configpcipowersave(ah, 1); + ath9k_hw_configpcipowersave(ah, 1, 1); ath9k_ps_restore(sc); ath9k_hw_setpower(ah, ATH9K_PM_FULL_SLEEP); } @@ -1226,11 +1226,6 @@ static void ath9k_rfkill_poll_state(struct ieee80211_hw *hw) bool blocked = !!ath_is_rfkill_set(sc); wiphy_rfkill_set_hw_state(hw->wiphy, blocked); - - if (blocked) - ath_radio_disable(sc); - else - ath_radio_enable(sc); } static void ath_start_rfkill_poll(struct ath_softc *sc) @@ -1260,6 +1255,7 @@ void ath_detach(struct ath_softc *sc) DPRINTF(sc, ATH_DBG_CONFIG, "Detach ATH hw\n"); ath_deinit_leds(sc); + wiphy_rfkill_stop_polling(sc->hw->wiphy); for (i = 0; i < sc->num_sec_wiphy; i++) { struct ath_wiphy *aphy = sc->sec_wiphy[i]; @@ -1942,7 +1938,7 @@ static int ath9k_start(struct ieee80211_hw *hw) init_channel = ath_get_curchannel(sc, hw); /* Reset SERDES registers */ - ath9k_hw_configpcipowersave(sc->sc_ah, 0); + ath9k_hw_configpcipowersave(sc->sc_ah, 0, 0); /* * The basic interface to setting the hardware in a good @@ -2166,11 +2162,9 @@ static void ath9k_stop(struct ieee80211_hw *hw) } else sc->rx.rxlink = NULL; - wiphy_rfkill_stop_polling(sc->hw->wiphy); - /* disable HAL and put h/w to sleep */ ath9k_hw_disable(sc->sc_ah); - ath9k_hw_configpcipowersave(sc->sc_ah, 1); + ath9k_hw_configpcipowersave(sc->sc_ah, 1, 1); ath9k_hw_setpower(sc->sc_ah, ATH9K_PM_FULL_SLEEP); sc->sc_flags |= SC_OP_INVALID; diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h index e5c29eb86e8..d83b77f821e 100644 --- a/drivers/net/wireless/ath/ath9k/reg.h +++ b/drivers/net/wireless/ath/ath9k/reg.h @@ -676,8 +676,9 @@ #define AR_RC_HOSTIF 0x00000100 #define AR_WA 0x4004 +#define AR_WA_D3_L1_DISABLE (1 << 14) #define AR9285_WA_DEFAULT 0x004a05cb -#define AR9280_WA_DEFAULT 0x0040073f +#define AR9280_WA_DEFAULT 0x0040073b #define AR_WA_DEFAULT 0x0000073f diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig index 83e38134acc..54ea61c15d8 100644 --- a/drivers/net/wireless/b43/Kconfig +++ b/drivers/net/wireless/b43/Kconfig @@ -61,11 +61,28 @@ config B43_PCMCIA If unsure, say N. +config B43_SDIO + bool "Broadcom 43xx SDIO device support (EXPERIMENTAL)" + depends on B43 && SSB_SDIOHOST_POSSIBLE && EXPERIMENTAL + select SSB_SDIOHOST + ---help--- + Broadcom 43xx device support for Soft-MAC SDIO devices. + + With this config option you can drive Soft-MAC b43 cards with a + Secure Digital I/O interface. + This includes the WLAN daughter card found on the Nintendo Wii + video game console. + Note that this does not support Broadcom 43xx Full-MAC devices. + + It's safe to select Y here, even if you don't have a B43 SDIO device. + + If unsure, say N. + # Data transfers to the device via PIO -# This is only needed on PCMCIA devices. All others can do DMA properly. +# This is only needed on PCMCIA and SDIO devices. All others can do DMA properly. config B43_PIO bool - depends on B43 && (B43_PCMCIA || B43_FORCE_PIO) + depends on B43 && (B43_SDIO || B43_PCMCIA || B43_FORCE_PIO) select SSB_BLOCKIO default y diff --git a/drivers/net/wireless/b43/Makefile b/drivers/net/wireless/b43/Makefile index da379f4b0c3..84772a2542d 100644 --- a/drivers/net/wireless/b43/Makefile +++ b/drivers/net/wireless/b43/Makefile @@ -16,6 +16,7 @@ b43-$(CONFIG_B43_PIO) += pio.o b43-y += rfkill.o b43-$(CONFIG_B43_LEDS) += leds.o b43-$(CONFIG_B43_PCMCIA) += pcmcia.o +b43-$(CONFIG_B43_SDIO) += sdio.o b43-$(CONFIG_B43_DEBUG) += debugfs.o obj-$(CONFIG_B43) += b43.o diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h index 09cfe68537b..fa1549a03c7 100644 --- a/drivers/net/wireless/b43/b43.h +++ b/drivers/net/wireless/b43/b43.h @@ -629,13 +629,6 @@ struct b43_wl { * from the mac80211 subsystem. */ u16 mac80211_initially_registered_queues; - /* R/W lock for data transmission. - * Transmissions on 2+ queues can run concurrently, but somebody else - * might sync with TX by write_lock_irqsave()'ing. */ - rwlock_t tx_lock; - /* Lock for LEDs access. */ - spinlock_t leds_lock; - /* We can only have one operating interface (802.11 core) * at a time. General information about this interface follows. */ @@ -686,6 +679,9 @@ struct b43_wl { struct work_struct tx_work; /* Queue of packets to be transmitted. */ struct sk_buff_head tx_queue; + + /* The device LEDs. */ + struct b43_leds leds; }; /* The type of the firmware file. */ @@ -768,13 +764,10 @@ struct b43_wldev { /* The device initialization status. * Use b43_status() to query. */ atomic_t __init_status; - /* Saved init status for handling suspend. */ - int suspend_init_status; bool bad_frames_preempt; /* Use "Bad Frames Preemption" (default off) */ bool dfq_valid; /* Directed frame queue valid (IBSS PS mode, ATIM) */ bool radio_hw_enable; /* saved state of radio hardware enabled state */ - bool suspend_in_progress; /* TRUE, if we are in a suspend/resume cycle */ bool qos_enabled; /* TRUE, if QoS is used. */ bool hwcrypto_enabled; /* TRUE, if HW crypto acceleration is enabled. */ @@ -794,12 +787,6 @@ struct b43_wldev { /* Various statistics about the physical device. */ struct b43_stats stats; - /* The device LEDs. */ - struct b43_led led_tx; - struct b43_led led_rx; - struct b43_led led_assoc; - struct b43_led led_radio; - /* Reason code of the last interrupt. */ u32 irq_reason; u32 dma_reason[6]; @@ -830,6 +817,10 @@ struct b43_wldev { /* Debugging stuff follows. */ #ifdef CONFIG_B43_DEBUG struct b43_dfsentry *dfsentry; + unsigned int irq_count; + unsigned int irq_bit_count[32]; + unsigned int tx_count; + unsigned int rx_count; #endif }; diff --git a/drivers/net/wireless/b43/debugfs.c b/drivers/net/wireless/b43/debugfs.c index 8f64943e3f6..80b19a44a40 100644 --- a/drivers/net/wireless/b43/debugfs.c +++ b/drivers/net/wireless/b43/debugfs.c @@ -689,6 +689,7 @@ static void b43_add_dynamic_debug(struct b43_wldev *dev) add_dyn_dbg("debug_lo", B43_DBG_LO, 0); add_dyn_dbg("debug_firmware", B43_DBG_FIRMWARE, 0); add_dyn_dbg("debug_keys", B43_DBG_KEYS, 0); + add_dyn_dbg("debug_verbose_stats", B43_DBG_VERBOSESTATS, 0); #undef add_dyn_dbg } diff --git a/drivers/net/wireless/b43/debugfs.h b/drivers/net/wireless/b43/debugfs.h index e47b4b488b0..822aad8842f 100644 --- a/drivers/net/wireless/b43/debugfs.h +++ b/drivers/net/wireless/b43/debugfs.h @@ -13,6 +13,7 @@ enum b43_dyndbg { /* Dynamic debugging features */ B43_DBG_LO, B43_DBG_FIRMWARE, B43_DBG_KEYS, + B43_DBG_VERBOSESTATS, __B43_NR_DYNDBG, }; diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c index a467ee260a1..8701034569f 100644 --- a/drivers/net/wireless/b43/dma.c +++ b/drivers/net/wireless/b43/dma.c @@ -1428,9 +1428,9 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev, ring->nr_failed_tx_packets++; ring->nr_total_packet_tries += status->frame_count; #endif /* DEBUG */ - ieee80211_tx_status_irqsafe(dev->wl->hw, meta->skb); + ieee80211_tx_status(dev->wl->hw, meta->skb); - /* skb is freed by ieee80211_tx_status_irqsafe() */ + /* skb is freed by ieee80211_tx_status() */ meta->skb = NULL; } else { /* No need to call free_descriptor_buffer here, as diff --git a/drivers/net/wireless/b43/leds.c b/drivers/net/wireless/b43/leds.c index c8b317094c3..fbe3d4f62ce 100644 --- a/drivers/net/wireless/b43/leds.c +++ b/drivers/net/wireless/b43/leds.c @@ -34,57 +34,88 @@ static void b43_led_turn_on(struct b43_wldev *dev, u8 led_index, bool activelow) { - struct b43_wl *wl = dev->wl; - unsigned long flags; u16 ctl; - spin_lock_irqsave(&wl->leds_lock, flags); ctl = b43_read16(dev, B43_MMIO_GPIO_CONTROL); if (activelow) ctl &= ~(1 << led_index); else ctl |= (1 << led_index); b43_write16(dev, B43_MMIO_GPIO_CONTROL, ctl); - spin_unlock_irqrestore(&wl->leds_lock, flags); } static void b43_led_turn_off(struct b43_wldev *dev, u8 led_index, bool activelow) { - struct b43_wl *wl = dev->wl; - unsigned long flags; u16 ctl; - spin_lock_irqsave(&wl->leds_lock, flags); ctl = b43_read16(dev, B43_MMIO_GPIO_CONTROL); if (activelow) ctl |= (1 << led_index); else ctl &= ~(1 << led_index); b43_write16(dev, B43_MMIO_GPIO_CONTROL, ctl); - spin_unlock_irqrestore(&wl->leds_lock, flags); } -/* Callback from the LED subsystem. */ -static void b43_led_brightness_set(struct led_classdev *led_dev, - enum led_brightness brightness) +static void b43_led_update(struct b43_wldev *dev, + struct b43_led *led) { - struct b43_led *led = container_of(led_dev, struct b43_led, led_dev); - struct b43_wldev *dev = led->dev; bool radio_enabled; + bool turn_on; - if (unlikely(b43_status(dev) < B43_STAT_INITIALIZED)) + if (!led->wl) return; - /* Checking the radio-enabled status here is slightly racy, - * but we want to avoid the locking overhead and we don't care - * whether the LED has the wrong state for a second. */ radio_enabled = (dev->phy.radio_on && dev->radio_hw_enable); - if (brightness == LED_OFF || !radio_enabled) - b43_led_turn_off(dev, led->index, led->activelow); + /* The led->state read is racy, but we don't care. In case we raced + * with the brightness_set handler, we will be called again soon + * to fixup our state. */ + if (radio_enabled) + turn_on = atomic_read(&led->state) != LED_OFF; else + turn_on = 0; + if (turn_on == led->hw_state) + return; + led->hw_state = turn_on; + + if (turn_on) b43_led_turn_on(dev, led->index, led->activelow); + else + b43_led_turn_off(dev, led->index, led->activelow); +} + +static void b43_leds_work(struct work_struct *work) +{ + struct b43_leds *leds = container_of(work, struct b43_leds, work); + struct b43_wl *wl = container_of(leds, struct b43_wl, leds); + struct b43_wldev *dev; + + mutex_lock(&wl->mutex); + dev = wl->current_dev; + if (unlikely(!dev || b43_status(dev) < B43_STAT_STARTED)) + goto out_unlock; + + b43_led_update(dev, &wl->leds.led_tx); + b43_led_update(dev, &wl->leds.led_rx); + b43_led_update(dev, &wl->leds.led_radio); + b43_led_update(dev, &wl->leds.led_assoc); + +out_unlock: + mutex_unlock(&wl->mutex); +} + +/* Callback from the LED subsystem. */ +static void b43_led_brightness_set(struct led_classdev *led_dev, + enum led_brightness brightness) +{ + struct b43_led *led = container_of(led_dev, struct b43_led, led_dev); + struct b43_wl *wl = led->wl; + + if (likely(!wl->leds.stop)) { + atomic_set(&led->state, brightness); + ieee80211_queue_work(wl->hw, &wl->leds.work); + } } static int b43_register_led(struct b43_wldev *dev, struct b43_led *led, @@ -93,15 +124,15 @@ static int b43_register_led(struct b43_wldev *dev, struct b43_led *led, { int err; - b43_led_turn_off(dev, led_index, activelow); - if (led->dev) + if (led->wl) return -EEXIST; if (!default_trigger) return -EINVAL; - led->dev = dev; + led->wl = dev->wl; led->index = led_index; led->activelow = activelow; strncpy(led->name, name, sizeof(led->name)); + atomic_set(&led->state, 0); led->led_dev.name = led->name; led->led_dev.default_trigger = default_trigger; @@ -110,19 +141,19 @@ static int b43_register_led(struct b43_wldev *dev, struct b43_led *led, err = led_classdev_register(dev->dev->dev, &led->led_dev); if (err) { b43warn(dev->wl, "LEDs: Failed to register %s\n", name); - led->dev = NULL; + led->wl = NULL; return err; } + return 0; } static void b43_unregister_led(struct b43_led *led) { - if (!led->dev) + if (!led->wl) return; led_classdev_unregister(&led->led_dev); - b43_led_turn_off(led->dev, led->index, led->activelow); - led->dev = NULL; + led->wl = NULL; } static void b43_map_led(struct b43_wldev *dev, @@ -137,24 +168,20 @@ static void b43_map_led(struct b43_wldev *dev, * generic LED triggers. */ switch (behaviour) { case B43_LED_INACTIVE: - break; case B43_LED_OFF: - b43_led_turn_off(dev, led_index, activelow); - break; case B43_LED_ON: - b43_led_turn_on(dev, led_index, activelow); break; case B43_LED_ACTIVITY: case B43_LED_TRANSFER: case B43_LED_APTRANSFER: snprintf(name, sizeof(name), "b43-%s::tx", wiphy_name(hw->wiphy)); - b43_register_led(dev, &dev->led_tx, name, + b43_register_led(dev, &dev->wl->leds.led_tx, name, ieee80211_get_tx_led_name(hw), led_index, activelow); snprintf(name, sizeof(name), "b43-%s::rx", wiphy_name(hw->wiphy)); - b43_register_led(dev, &dev->led_rx, name, + b43_register_led(dev, &dev->wl->leds.led_rx, name, ieee80211_get_rx_led_name(hw), led_index, activelow); break; @@ -164,18 +191,15 @@ static void b43_map_led(struct b43_wldev *dev, case B43_LED_MODE_BG: snprintf(name, sizeof(name), "b43-%s::radio", wiphy_name(hw->wiphy)); - b43_register_led(dev, &dev->led_radio, name, + b43_register_led(dev, &dev->wl->leds.led_radio, name, ieee80211_get_radio_led_name(hw), led_index, activelow); - /* Sync the RF-kill LED state with radio and switch states. */ - if (dev->phy.radio_on && b43_is_hw_radio_enabled(dev)) - b43_led_turn_on(dev, led_index, activelow); break; case B43_LED_WEIRD: case B43_LED_ASSOC: snprintf(name, sizeof(name), "b43-%s::assoc", wiphy_name(hw->wiphy)); - b43_register_led(dev, &dev->led_assoc, name, + b43_register_led(dev, &dev->wl->leds.led_assoc, name, ieee80211_get_assoc_led_name(hw), led_index, activelow); break; @@ -186,58 +210,150 @@ static void b43_map_led(struct b43_wldev *dev, } } -void b43_leds_init(struct b43_wldev *dev) +static void b43_led_get_sprominfo(struct b43_wldev *dev, + unsigned int led_index, + enum b43_led_behaviour *behaviour, + bool *activelow) { struct ssb_bus *bus = dev->dev->bus; u8 sprom[4]; - int i; - enum b43_led_behaviour behaviour; - bool activelow; sprom[0] = bus->sprom.gpio0; sprom[1] = bus->sprom.gpio1; sprom[2] = bus->sprom.gpio2; sprom[3] = bus->sprom.gpio3; - for (i = 0; i < 4; i++) { - if (sprom[i] == 0xFF) { - /* There is no LED information in the SPROM - * for this LED. Hardcode it here. */ - activelow = 0; - switch (i) { - case 0: - behaviour = B43_LED_ACTIVITY; - activelow = 1; - if (bus->boardinfo.vendor == PCI_VENDOR_ID_COMPAQ) - behaviour = B43_LED_RADIO_ALL; - break; - case 1: - behaviour = B43_LED_RADIO_B; - if (bus->boardinfo.vendor == PCI_VENDOR_ID_ASUSTEK) - behaviour = B43_LED_ASSOC; - break; - case 2: - behaviour = B43_LED_RADIO_A; - break; - case 3: - behaviour = B43_LED_OFF; - break; - default: - B43_WARN_ON(1); - return; - } + if (sprom[led_index] == 0xFF) { + /* There is no LED information in the SPROM + * for this LED. Hardcode it here. */ + *activelow = 0; + switch (led_index) { + case 0: + *behaviour = B43_LED_ACTIVITY; + *activelow = 1; + if (bus->boardinfo.vendor == PCI_VENDOR_ID_COMPAQ) + *behaviour = B43_LED_RADIO_ALL; + break; + case 1: + *behaviour = B43_LED_RADIO_B; + if (bus->boardinfo.vendor == PCI_VENDOR_ID_ASUSTEK) + *behaviour = B43_LED_ASSOC; + break; + case 2: + *behaviour = B43_LED_RADIO_A; + break; + case 3: + *behaviour = B43_LED_OFF; + break; + default: + B43_WARN_ON(1); + return; + } + } else { + *behaviour = sprom[led_index] & B43_LED_BEHAVIOUR; + *activelow = !!(sprom[led_index] & B43_LED_ACTIVELOW); + } +} + +void b43_leds_init(struct b43_wldev *dev) +{ + struct b43_led *led; + unsigned int i; + enum b43_led_behaviour behaviour; + bool activelow; + + /* Sync the RF-kill LED state (if we have one) with radio and switch states. */ + led = &dev->wl->leds.led_radio; + if (led->wl) { + if (dev->phy.radio_on && b43_is_hw_radio_enabled(dev)) { + b43_led_turn_on(dev, led->index, led->activelow); + led->hw_state = 1; + atomic_set(&led->state, 1); } else { - behaviour = sprom[i] & B43_LED_BEHAVIOUR; - activelow = !!(sprom[i] & B43_LED_ACTIVELOW); + b43_led_turn_off(dev, led->index, led->activelow); + led->hw_state = 0; + atomic_set(&led->state, 0); } - b43_map_led(dev, i, behaviour, activelow); } + + /* Initialize TX/RX/ASSOC leds */ + led = &dev->wl->leds.led_tx; + if (led->wl) { + b43_led_turn_off(dev, led->index, led->activelow); + led->hw_state = 0; + atomic_set(&led->state, 0); + } + led = &dev->wl->leds.led_rx; + if (led->wl) { + b43_led_turn_off(dev, led->index, led->activelow); + led->hw_state = 0; + atomic_set(&led->state, 0); + } + led = &dev->wl->leds.led_assoc; + if (led->wl) { + b43_led_turn_off(dev, led->index, led->activelow); + led->hw_state = 0; + atomic_set(&led->state, 0); + } + + /* Initialize other LED states. */ + for (i = 0; i < B43_MAX_NR_LEDS; i++) { + b43_led_get_sprominfo(dev, i, &behaviour, &activelow); + switch (behaviour) { + case B43_LED_OFF: + b43_led_turn_off(dev, i, activelow); + break; + case B43_LED_ON: + b43_led_turn_on(dev, i, activelow); + break; + default: + /* Leave others as-is. */ + break; + } + } + + dev->wl->leds.stop = 0; } void b43_leds_exit(struct b43_wldev *dev) { - b43_unregister_led(&dev->led_tx); - b43_unregister_led(&dev->led_rx); - b43_unregister_led(&dev->led_assoc); - b43_unregister_led(&dev->led_radio); + struct b43_leds *leds = &dev->wl->leds; + + b43_led_turn_off(dev, leds->led_tx.index, leds->led_tx.activelow); + b43_led_turn_off(dev, leds->led_rx.index, leds->led_rx.activelow); + b43_led_turn_off(dev, leds->led_assoc.index, leds->led_assoc.activelow); + b43_led_turn_off(dev, leds->led_radio.index, leds->led_radio.activelow); +} + +void b43_leds_stop(struct b43_wldev *dev) +{ + struct b43_leds *leds = &dev->wl->leds; + + leds->stop = 1; + cancel_work_sync(&leds->work); +} + +void b43_leds_register(struct b43_wldev *dev) +{ + unsigned int i; + enum b43_led_behaviour behaviour; + bool activelow; + + INIT_WORK(&dev->wl->leds.work, b43_leds_work); + + /* Register the LEDs to the LED subsystem. */ + for (i = 0; i < B43_MAX_NR_LEDS; i++) { + b43_led_get_sprominfo(dev, i, &behaviour, &activelow); + b43_map_led(dev, i, behaviour, activelow); + } +} + +void b43_leds_unregister(struct b43_wldev *dev) +{ + struct b43_leds *leds = &dev->wl->leds; + + b43_unregister_led(&leds->led_tx); + b43_unregister_led(&leds->led_rx); + b43_unregister_led(&leds->led_assoc); + b43_unregister_led(&leds->led_radio); } diff --git a/drivers/net/wireless/b43/leds.h b/drivers/net/wireless/b43/leds.h index b8b1dd52124..9592e4c5a5f 100644 --- a/drivers/net/wireless/b43/leds.h +++ b/drivers/net/wireless/b43/leds.h @@ -7,12 +7,13 @@ struct b43_wldev; #include <linux/types.h> #include <linux/leds.h> +#include <linux/workqueue.h> #define B43_LED_MAX_NAME_LEN 31 struct b43_led { - struct b43_wldev *dev; + struct b43_wl *wl; /* The LED class device */ struct led_classdev led_dev; /* The index number of the LED. */ @@ -22,8 +23,24 @@ struct b43_led { bool activelow; /* The unique name string for this LED device. */ char name[B43_LED_MAX_NAME_LEN + 1]; + /* The current status of the LED. This is updated locklessly. */ + atomic_t state; + /* The active state in hardware. */ + bool hw_state; }; +struct b43_leds { + struct b43_led led_tx; + struct b43_led led_rx; + struct b43_led led_radio; + struct b43_led led_assoc; + + bool stop; + struct work_struct work; +}; + +#define B43_MAX_NR_LEDS 4 + #define B43_LED_BEHAVIOUR 0x7F #define B43_LED_ACTIVELOW 0x80 /* LED behaviour values */ @@ -42,23 +59,35 @@ enum b43_led_behaviour { B43_LED_INACTIVE, }; +void b43_leds_register(struct b43_wldev *dev); +void b43_leds_unregister(struct b43_wldev *dev); void b43_leds_init(struct b43_wldev *dev); void b43_leds_exit(struct b43_wldev *dev); +void b43_leds_stop(struct b43_wldev *dev); #else /* CONFIG_B43_LEDS */ /* LED support disabled */ -struct b43_led { +struct b43_leds { /* empty */ }; +static inline void b43_leds_register(struct b43_wldev *dev) +{ +} +static inline void b43_leds_unregister(struct b43_wldev *dev) +{ +} static inline void b43_leds_init(struct b43_wldev *dev) { } static inline void b43_leds_exit(struct b43_wldev *dev) { } +static inline void b43_leds_stop(struct b43_wldev *dev) +{ +} #endif /* CONFIG_B43_LEDS */ #endif /* B43_LEDS_H_ */ diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index e789792a36b..9b907a36bb8 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -8,6 +8,9 @@ Copyright (c) 2005 Danny van Dyk <kugelfang@gentoo.org> Copyright (c) 2005 Andreas Jaggi <andreas.jaggi@waterwave.ch> + SDIO support + Copyright (c) 2009 Albert Herranz <albert_herranz@yahoo.es> + Some parts of the code in this file are derived from the ipw2200 driver Copyright(c) 2003 - 2004 Intel Corporation. @@ -53,6 +56,8 @@ #include "xmit.h" #include "lo.h" #include "pcmcia.h" +#include "sdio.h" +#include <linux/mmc/sdio_func.h> MODULE_DESCRIPTION("Broadcom B43 wireless driver"); MODULE_AUTHOR("Martin Langer"); @@ -1587,7 +1592,7 @@ static void b43_beacon_update_trigger_work(struct work_struct *work) mutex_lock(&wl->mutex); dev = wl->current_dev; if (likely(dev && (b43_status(dev) >= B43_STAT_INITIALIZED))) { - if (0 /*FIXME dev->dev->bus->bustype == SSB_BUSTYPE_SDIO*/) { + if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) { /* wl->mutex is enough. */ b43_do_beacon_update_trigger_work(dev); mmiowb(); @@ -1825,6 +1830,16 @@ static void b43_do_interrupt_thread(struct b43_wldev *dev) /* Re-enable interrupts on the device by restoring the current interrupt mask. */ b43_write32(dev, B43_MMIO_GEN_IRQ_MASK, dev->irq_mask); + +#if B43_DEBUG + if (b43_debug(dev, B43_DBG_VERBOSESTATS)) { + dev->irq_count++; + for (i = 0; i < ARRAY_SIZE(dev->irq_bit_count); i++) { + if (reason & (1 << i)) + dev->irq_bit_count[i]++; + } + } +#endif } /* Interrupt thread handler. Handles device interrupts in thread context. */ @@ -1905,6 +1920,21 @@ static irqreturn_t b43_interrupt_handler(int irq, void *dev_id) return ret; } +/* SDIO interrupt handler. This runs in process context. */ +static void b43_sdio_interrupt_handler(struct b43_wldev *dev) +{ + struct b43_wl *wl = dev->wl; + irqreturn_t ret; + + mutex_lock(&wl->mutex); + + ret = b43_do_interrupt(dev); + if (ret == IRQ_WAKE_THREAD) + b43_do_interrupt_thread(dev); + + mutex_unlock(&wl->mutex); +} + void b43_do_release_fw(struct b43_firmware_file *fw) { release_firmware(fw->data); @@ -2645,6 +2675,20 @@ static void b43_adjust_opmode(struct b43_wldev *dev) cfp_pretbtt = 50; } b43_write16(dev, 0x612, cfp_pretbtt); + + /* FIXME: We don't currently implement the PMQ mechanism, + * so always disable it. If we want to implement PMQ, + * we need to enable it here (clear DISCPMQ) in AP mode. + */ + if (0 /* ctl & B43_MACCTL_AP */) { + b43_write32(dev, B43_MMIO_MACCTL, + b43_read32(dev, B43_MMIO_MACCTL) + & ~B43_MACCTL_DISCPMQ); + } else { + b43_write32(dev, B43_MMIO_MACCTL, + b43_read32(dev, B43_MMIO_MACCTL) + | B43_MACCTL_DISCPMQ); + } } static void b43_rate_memory_write(struct b43_wldev *dev, u16 rate, int is_ofdm) @@ -2873,6 +2917,27 @@ static void b43_periodic_every15sec(struct b43_wldev *dev) atomic_set(&phy->txerr_cnt, B43_PHY_TX_BADNESS_LIMIT); wmb(); + +#if B43_DEBUG + if (b43_debug(dev, B43_DBG_VERBOSESTATS)) { + unsigned int i; + + b43dbg(dev->wl, "Stats: %7u IRQs/sec, %7u TX/sec, %7u RX/sec\n", + dev->irq_count / 15, + dev->tx_count / 15, + dev->rx_count / 15); + dev->irq_count = 0; + dev->tx_count = 0; + dev->rx_count = 0; + for (i = 0; i < ARRAY_SIZE(dev->irq_bit_count); i++) { + if (dev->irq_bit_count[i]) { + b43dbg(dev->wl, "Stats: %7u IRQ-%02u/sec (0x%08X)\n", + dev->irq_bit_count[i] / 15, i, (1 << i)); + dev->irq_bit_count[i] = 0; + } + } + } +#endif } static void do_periodic_work(struct b43_wldev *dev) @@ -3002,14 +3067,18 @@ static void b43_security_init(struct b43_wldev *dev) static int b43_rng_read(struct hwrng *rng, u32 *data) { struct b43_wl *wl = (struct b43_wl *)rng->priv; + struct b43_wldev *dev; + int count = -ENODEV; - /* FIXME: We need to take wl->mutex here to make sure the device - * is not going away from under our ass. However it could deadlock - * with hwrng internal locking. */ - - *data = b43_read16(wl->current_dev, B43_MMIO_RNG); + mutex_lock(&wl->mutex); + dev = wl->current_dev; + if (likely(dev && b43_status(dev) >= B43_STAT_INITIALIZED)) { + *data = b43_read16(dev, B43_MMIO_RNG); + count = sizeof(u16); + } + mutex_unlock(&wl->mutex); - return (sizeof(u16)); + return count; } #endif /* CONFIG_B43_HWRNG */ @@ -3068,6 +3137,9 @@ static void b43_tx_work(struct work_struct *work) dev_kfree_skb(skb); /* Drop it */ } +#if B43_DEBUG + dev->tx_count++; +#endif mutex_unlock(&wl->mutex); } @@ -3820,7 +3892,7 @@ redo: /* Disable interrupts on the device. */ b43_set_status(dev, B43_STAT_INITIALIZED); - if (0 /*FIXME dev->dev->bus->bustype == SSB_BUSTYPE_SDIO*/) { + if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) { /* wl->mutex is locked. That is enough. */ b43_write32(dev, B43_MMIO_GEN_IRQ_MASK, 0); b43_read32(dev, B43_MMIO_GEN_IRQ_MASK); /* Flush */ @@ -3830,10 +3902,15 @@ redo: b43_read32(dev, B43_MMIO_GEN_IRQ_MASK); /* Flush */ spin_unlock_irq(&wl->hardirq_lock); } - /* Synchronize the interrupt handlers. Unlock to avoid deadlocks. */ + /* Synchronize and free the interrupt handlers. Unlock to avoid deadlocks. */ orig_dev = dev; mutex_unlock(&wl->mutex); - synchronize_irq(dev->dev->irq); + if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) { + b43_sdio_free_irq(dev); + } else { + synchronize_irq(dev->dev->irq); + free_irq(dev->dev->irq, dev); + } mutex_lock(&wl->mutex); dev = wl->current_dev; if (!dev) @@ -3850,7 +3927,7 @@ redo: dev_kfree_skb(skb_dequeue(&wl->tx_queue)); b43_mac_suspend(dev); - free_irq(dev->dev->irq, dev); + b43_leds_exit(dev); b43dbg(wl, "Wireless interface stopped\n"); return dev; @@ -3864,12 +3941,20 @@ static int b43_wireless_core_start(struct b43_wldev *dev) B43_WARN_ON(b43_status(dev) != B43_STAT_INITIALIZED); drain_txstatus_queue(dev); - err = request_threaded_irq(dev->dev->irq, b43_interrupt_handler, - b43_interrupt_thread_handler, - IRQF_SHARED, KBUILD_MODNAME, dev); - if (err) { - b43err(dev->wl, "Cannot request IRQ-%d\n", dev->dev->irq); - goto out; + if (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) { + err = b43_sdio_request_irq(dev, b43_sdio_interrupt_handler); + if (err) { + b43err(dev->wl, "Cannot request SDIO IRQ\n"); + goto out; + } + } else { + err = request_threaded_irq(dev->dev->irq, b43_interrupt_handler, + b43_interrupt_thread_handler, + IRQF_SHARED, KBUILD_MODNAME, dev); + if (err) { + b43err(dev->wl, "Cannot request IRQ-%d\n", dev->dev->irq); + goto out; + } } /* We are ready to run. */ @@ -3882,8 +3967,10 @@ static int b43_wireless_core_start(struct b43_wldev *dev) /* Start maintainance work */ b43_periodic_tasks_setup(dev); + b43_leds_init(dev); + b43dbg(dev->wl, "Wireless interface started\n"); - out: +out: return err; } @@ -4160,10 +4247,6 @@ static void b43_wireless_core_exit(struct b43_wldev *dev) macctl |= B43_MACCTL_PSM_JMP0; b43_write32(dev, B43_MMIO_MACCTL, macctl); - if (!dev->suspend_in_progress) { - b43_leds_exit(dev); - b43_rng_exit(dev->wl); - } b43_dma_free(dev); b43_pio_free(dev); b43_chip_exit(dev); @@ -4180,7 +4263,6 @@ static void b43_wireless_core_exit(struct b43_wldev *dev) /* Initialize a wireless core */ static int b43_wireless_core_init(struct b43_wldev *dev) { - struct b43_wl *wl = dev->wl; struct ssb_bus *bus = dev->dev->bus; struct ssb_sprom *sprom = &bus->sprom; struct b43_phy *phy = &dev->phy; @@ -4264,7 +4346,9 @@ static int b43_wireless_core_init(struct b43_wldev *dev) /* Maximum Contention Window */ b43_shm_write16(dev, B43_SHM_SCRATCH, B43_SHM_SC_MAXCONT, 0x3FF); - if ((dev->dev->bus->bustype == SSB_BUSTYPE_PCMCIA) || B43_FORCE_PIO) { + if ((dev->dev->bus->bustype == SSB_BUSTYPE_PCMCIA) || + (dev->dev->bus->bustype == SSB_BUSTYPE_SDIO) || + B43_FORCE_PIO) { dev->__using_pio_transfers = 1; err = b43_pio_init(dev); } else { @@ -4280,15 +4364,13 @@ static int b43_wireless_core_init(struct b43_wldev *dev) ssb_bus_powerup(bus, !(sprom->boardflags_lo & B43_BFL_XTAL_NOSLOW)); b43_upload_card_macaddress(dev); b43_security_init(dev); - if (!dev->suspend_in_progress) - b43_rng_init(wl); + + ieee80211_wake_queues(dev->wl->hw); ieee80211_wake_queues(dev->wl->hw); b43_set_status(dev, B43_STAT_INITIALIZED); - if (!dev->suspend_in_progress) - b43_leds_init(dev); out: return err; @@ -4837,7 +4919,6 @@ static int b43_wireless_init(struct ssb_device *dev) /* Initialize struct b43_wl */ wl->hw = hw; - spin_lock_init(&wl->leds_lock); mutex_init(&wl->mutex); spin_lock_init(&wl->hardirq_lock); INIT_LIST_HEAD(&wl->devlist); @@ -4878,6 +4959,8 @@ static int b43_probe(struct ssb_device *dev, const struct ssb_device_id *id) err = ieee80211_register_hw(wl->hw); if (err) goto err_one_core_detach; + b43_leds_register(wl->current_dev); + b43_rng_init(wl); } out: @@ -4906,12 +4989,15 @@ static void b43_remove(struct ssb_device *dev) * might have modified it. Restoring is important, so the networking * stack can properly free resources. */ wl->hw->queues = wl->mac80211_initially_registered_queues; + b43_leds_stop(wldev); ieee80211_unregister_hw(wl->hw); } b43_one_core_detach(dev); if (list_empty(&wl->devlist)) { + b43_rng_exit(wl); + b43_leds_unregister(wldev); /* Last core on the chip unregistered. * We can destroy common struct b43_wl. */ @@ -4929,80 +5015,17 @@ void b43_controller_restart(struct b43_wldev *dev, const char *reason) ieee80211_queue_work(dev->wl->hw, &dev->restart_work); } -#ifdef CONFIG_PM - -static int b43_suspend(struct ssb_device *dev, pm_message_t state) -{ - struct b43_wldev *wldev = ssb_get_drvdata(dev); - struct b43_wl *wl = wldev->wl; - - b43dbg(wl, "Suspending...\n"); - - mutex_lock(&wl->mutex); - wldev->suspend_in_progress = true; - wldev->suspend_init_status = b43_status(wldev); - if (wldev->suspend_init_status >= B43_STAT_STARTED) - wldev = b43_wireless_core_stop(wldev); - if (wldev && wldev->suspend_init_status >= B43_STAT_INITIALIZED) - b43_wireless_core_exit(wldev); - mutex_unlock(&wl->mutex); - - b43dbg(wl, "Device suspended.\n"); - - return 0; -} - -static int b43_resume(struct ssb_device *dev) -{ - struct b43_wldev *wldev = ssb_get_drvdata(dev); - struct b43_wl *wl = wldev->wl; - int err = 0; - - b43dbg(wl, "Resuming...\n"); - - mutex_lock(&wl->mutex); - if (wldev->suspend_init_status >= B43_STAT_INITIALIZED) { - err = b43_wireless_core_init(wldev); - if (err) { - b43err(wl, "Resume failed at core init\n"); - goto out; - } - } - if (wldev->suspend_init_status >= B43_STAT_STARTED) { - err = b43_wireless_core_start(wldev); - if (err) { - b43_leds_exit(wldev); - b43_rng_exit(wldev->wl); - b43_wireless_core_exit(wldev); - b43err(wl, "Resume failed at core start\n"); - goto out; - } - } - b43dbg(wl, "Device resumed.\n"); - out: - wldev->suspend_in_progress = false; - mutex_unlock(&wl->mutex); - return err; -} - -#else /* CONFIG_PM */ -# define b43_suspend NULL -# define b43_resume NULL -#endif /* CONFIG_PM */ - static struct ssb_driver b43_ssb_driver = { .name = KBUILD_MODNAME, .id_table = b43_ssb_tbl, .probe = b43_probe, .remove = b43_remove, - .suspend = b43_suspend, - .resume = b43_resume, }; static void b43_print_driverinfo(void) { const char *feat_pci = "", *feat_pcmcia = "", *feat_nphy = "", - *feat_leds = ""; + *feat_leds = "", *feat_sdio = ""; #ifdef CONFIG_B43_PCI_AUTOSELECT feat_pci = "P"; @@ -5016,11 +5039,14 @@ static void b43_print_driverinfo(void) #ifdef CONFIG_B43_LEDS feat_leds = "L"; #endif +#ifdef CONFIG_B43_SDIO + feat_sdio = "S"; +#endif printk(KERN_INFO "Broadcom 43xx driver loaded " - "[ Features: %s%s%s%s, Firmware-ID: " + "[ Features: %s%s%s%s%s, Firmware-ID: " B43_SUPPORTED_FIRMWARE_ID " ]\n", feat_pci, feat_pcmcia, feat_nphy, - feat_leds); + feat_leds, feat_sdio); } static int __init b43_init(void) @@ -5031,13 +5057,18 @@ static int __init b43_init(void) err = b43_pcmcia_init(); if (err) goto err_dfs_exit; - err = ssb_driver_register(&b43_ssb_driver); + err = b43_sdio_init(); if (err) goto err_pcmcia_exit; + err = ssb_driver_register(&b43_ssb_driver); + if (err) + goto err_sdio_exit; b43_print_driverinfo(); return err; +err_sdio_exit: + b43_sdio_exit(); err_pcmcia_exit: b43_pcmcia_exit(); err_dfs_exit: @@ -5048,6 +5079,7 @@ err_dfs_exit: static void __exit b43_exit(void) { ssb_driver_unregister(&b43_ssb_driver); + b43_sdio_exit(); b43_pcmcia_exit(); b43_debugfs_exit(); } diff --git a/drivers/net/wireless/b43/phy_lp.c b/drivers/net/wireless/b43/phy_lp.c index 3e02d969f68..1e318d815a5 100644 --- a/drivers/net/wireless/b43/phy_lp.c +++ b/drivers/net/wireless/b43/phy_lp.c @@ -2228,6 +2228,16 @@ static enum b43_txpwr_result b43_lpphy_op_recalc_txpower(struct b43_wldev *dev, return B43_TXPWR_RES_DONE; } +void b43_lpphy_op_switch_analog(struct b43_wldev *dev, bool on) +{ + if (on) { + b43_phy_mask(dev, B43_LPPHY_AFE_CTL_OVR, 0xfff8); + } else { + b43_phy_set(dev, B43_LPPHY_AFE_CTL_OVRVAL, 0x0007); + b43_phy_set(dev, B43_LPPHY_AFE_CTL_OVR, 0x0007); + } +} + const struct b43_phy_operations b43_phyops_lp = { .allocate = b43_lpphy_op_allocate, .free = b43_lpphy_op_free, @@ -2239,7 +2249,7 @@ const struct b43_phy_operations b43_phyops_lp = { .radio_read = b43_lpphy_op_radio_read, .radio_write = b43_lpphy_op_radio_write, .software_rfkill = b43_lpphy_op_software_rfkill, - .switch_analog = b43_phyop_switch_analog_generic, + .switch_analog = b43_lpphy_op_switch_analog, .switch_channel = b43_lpphy_op_switch_channel, .get_default_chan = b43_lpphy_op_get_default_chan, .set_rx_antenna = b43_lpphy_op_set_rx_antenna, diff --git a/drivers/net/wireless/b43/pio.c b/drivers/net/wireless/b43/pio.c index 3498b68385e..e96091b3149 100644 --- a/drivers/net/wireless/b43/pio.c +++ b/drivers/net/wireless/b43/pio.c @@ -574,7 +574,7 @@ void b43_pio_handle_txstatus(struct b43_wldev *dev, q->buffer_used -= total_len; q->free_packet_slots += 1; - ieee80211_tx_status_irqsafe(dev->wl->hw, pack->skb); + ieee80211_tx_status(dev->wl->hw, pack->skb); pack->skb = NULL; list_add(&pack->list, &q->packets_list); diff --git a/drivers/net/wireless/b43/rfkill.c b/drivers/net/wireless/b43/rfkill.c index 31e55999893..7a3218c5ba7 100644 --- a/drivers/net/wireless/b43/rfkill.c +++ b/drivers/net/wireless/b43/rfkill.c @@ -28,7 +28,7 @@ /* Returns TRUE, if the radio is enabled in hardware. */ bool b43_is_hw_radio_enabled(struct b43_wldev *dev) { - if (dev->phy.rev >= 3) { + if (dev->phy.rev >= 3 || dev->phy.type == B43_PHYTYPE_LP) { if (!(b43_read32(dev, B43_MMIO_RADIO_HWENABLED_HI) & B43_MMIO_RADIO_HWENABLED_HI_MASK)) return 1; diff --git a/drivers/net/wireless/b43/sdio.c b/drivers/net/wireless/b43/sdio.c new file mode 100644 index 00000000000..0d3ac64147a --- /dev/null +++ b/drivers/net/wireless/b43/sdio.c @@ -0,0 +1,202 @@ +/* + * Broadcom B43 wireless driver + * + * SDIO over Sonics Silicon Backplane bus glue for b43. + * + * Copyright (C) 2009 Albert Herranz + * Copyright (C) 2009 Michael Buesch <mb@bu3sch.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/mmc/card.h> +#include <linux/mmc/sdio_func.h> +#include <linux/mmc/sdio_ids.h> +#include <linux/ssb/ssb.h> + +#include "sdio.h" +#include "b43.h" + + +#define HNBU_CHIPID 0x01 /* vendor & device id */ + +#define B43_SDIO_BLOCK_SIZE 64 /* rx fifo max size in bytes */ + + +static const struct b43_sdio_quirk { + u16 vendor; + u16 device; + unsigned int quirks; +} b43_sdio_quirks[] = { + { 0x14E4, 0x4318, SSB_QUIRK_SDIO_READ_AFTER_WRITE32, }, + { }, +}; + + +static unsigned int b43_sdio_get_quirks(u16 vendor, u16 device) +{ + const struct b43_sdio_quirk *q; + + for (q = b43_sdio_quirks; q->quirks; q++) { + if (vendor == q->vendor && device == q->device) + return q->quirks; + } + + return 0; +} + +static void b43_sdio_interrupt_dispatcher(struct sdio_func *func) +{ + struct b43_sdio *sdio = sdio_get_drvdata(func); + struct b43_wldev *dev = sdio->irq_handler_opaque; + + if (unlikely(b43_status(dev) < B43_STAT_STARTED)) + return; + + sdio_release_host(func); + sdio->irq_handler(dev); + sdio_claim_host(func); +} + +int b43_sdio_request_irq(struct b43_wldev *dev, + void (*handler)(struct b43_wldev *dev)) +{ + struct ssb_bus *bus = dev->dev->bus; + struct sdio_func *func = bus->host_sdio; + struct b43_sdio *sdio = sdio_get_drvdata(func); + int err; + + sdio->irq_handler_opaque = dev; + sdio->irq_handler = handler; + sdio_claim_host(func); + err = sdio_claim_irq(func, b43_sdio_interrupt_dispatcher); + sdio_release_host(func); + + return err; +} + +void b43_sdio_free_irq(struct b43_wldev *dev) +{ + struct ssb_bus *bus = dev->dev->bus; + struct sdio_func *func = bus->host_sdio; + struct b43_sdio *sdio = sdio_get_drvdata(func); + + sdio_claim_host(func); + sdio_release_irq(func); + sdio_release_host(func); + sdio->irq_handler_opaque = NULL; + sdio->irq_handler = NULL; +} + +static int b43_sdio_probe(struct sdio_func *func, + const struct sdio_device_id *id) +{ + struct b43_sdio *sdio; + struct sdio_func_tuple *tuple; + u16 vendor = 0, device = 0; + int error; + + /* Look for the card chip identifier. */ + tuple = func->tuples; + while (tuple) { + switch (tuple->code) { + case 0x80: + switch (tuple->data[0]) { + case HNBU_CHIPID: + if (tuple->size != 5) + break; + vendor = tuple->data[1] | (tuple->data[2]<<8); + device = tuple->data[3] | (tuple->data[4]<<8); + dev_info(&func->dev, "Chip ID %04x:%04x\n", + vendor, device); + break; + default: + break; + } + break; + default: + break; + } + tuple = tuple->next; + } + if (!vendor || !device) { + error = -ENODEV; + goto out; + } + + sdio_claim_host(func); + error = sdio_set_block_size(func, B43_SDIO_BLOCK_SIZE); + if (error) { + dev_err(&func->dev, "failed to set block size to %u bytes," + " error %d\n", B43_SDIO_BLOCK_SIZE, error); + goto err_release_host; + } + error = sdio_enable_func(func); + if (error) { + dev_err(&func->dev, "failed to enable func, error %d\n", error); + goto err_release_host; + } + sdio_release_host(func); + + sdio = kzalloc(sizeof(*sdio), GFP_KERNEL); + if (!sdio) { + error = -ENOMEM; + dev_err(&func->dev, "failed to allocate ssb bus\n"); + goto err_disable_func; + } + error = ssb_bus_sdiobus_register(&sdio->ssb, func, + b43_sdio_get_quirks(vendor, device)); + if (error) { + dev_err(&func->dev, "failed to register ssb sdio bus," + " error %d\n", error); + goto err_free_ssb; + } + sdio_set_drvdata(func, sdio); + + return 0; + +err_free_ssb: + kfree(sdio); +err_disable_func: + sdio_disable_func(func); +err_release_host: + sdio_release_host(func); +out: + return error; +} + +static void b43_sdio_remove(struct sdio_func *func) +{ + struct b43_sdio *sdio = sdio_get_drvdata(func); + + ssb_bus_unregister(&sdio->ssb); + sdio_disable_func(func); + kfree(sdio); + sdio_set_drvdata(func, NULL); +} + +static const struct sdio_device_id b43_sdio_ids[] = { + { SDIO_DEVICE(0x02d0, 0x044b) }, /* Nintendo Wii WLAN daughter card */ + { }, +}; + +static struct sdio_driver b43_sdio_driver = { + .name = "b43-sdio", + .id_table = b43_sdio_ids, + .probe = b43_sdio_probe, + .remove = b43_sdio_remove, +}; + +int b43_sdio_init(void) +{ + return sdio_register_driver(&b43_sdio_driver); +} + +void b43_sdio_exit(void) +{ + sdio_unregister_driver(&b43_sdio_driver); +} diff --git a/drivers/net/wireless/b43/sdio.h b/drivers/net/wireless/b43/sdio.h new file mode 100644 index 00000000000..fb633094403 --- /dev/null +++ b/drivers/net/wireless/b43/sdio.h @@ -0,0 +1,45 @@ +#ifndef B43_SDIO_H_ +#define B43_SDIO_H_ + +#include <linux/ssb/ssb.h> + +struct b43_wldev; + + +#ifdef CONFIG_B43_SDIO + +struct b43_sdio { + struct ssb_bus ssb; + void *irq_handler_opaque; + void (*irq_handler)(struct b43_wldev *dev); +}; + +int b43_sdio_request_irq(struct b43_wldev *dev, + void (*handler)(struct b43_wldev *dev)); +void b43_sdio_free_irq(struct b43_wldev *dev); + +int b43_sdio_init(void); +void b43_sdio_exit(void); + + +#else /* CONFIG_B43_SDIO */ + + +int b43_sdio_request_irq(struct b43_wldev *dev, + void (*handler)(struct b43_wldev *dev)) +{ + return -ENODEV; +} +void b43_sdio_free_irq(struct b43_wldev *dev) +{ +} +static inline int b43_sdio_init(void) +{ + return 0; +} +static inline void b43_sdio_exit(void) +{ +} + +#endif /* CONFIG_B43_SDIO */ +#endif /* B43_SDIO_H_ */ diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c index 14f541248b5..ac9f600995e 100644 --- a/drivers/net/wireless/b43/xmit.c +++ b/drivers/net/wireless/b43/xmit.c @@ -690,8 +690,11 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr) } memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status)); - ieee80211_rx_irqsafe(dev->wl->hw, skb); + ieee80211_rx(dev->wl->hw, skb); +#if B43_DEBUG + dev->rx_count++; +#endif return; drop: b43dbg(dev->wl, "RX: Packet dropped\n"); diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c index ca61d3796ce..3259b884154 100644 --- a/drivers/net/wireless/iwlwifi/iwl-4965.c +++ b/drivers/net/wireless/iwlwifi/iwl-4965.c @@ -2021,6 +2021,12 @@ static int iwl4965_tx_status_reply_tx(struct iwl_priv *priv, agg->frame_count, txq_id, idx); hdr = iwl_tx_queue_get_hdr(priv, txq_id, idx); + if (!hdr) { + IWL_ERR(priv, + "BUG_ON idx doesn't point to valid skb" + " idx=%d, txq_id=%d\n", idx, txq_id); + return -1; + } sc = le16_to_cpu(hdr->seq_ctrl); if (idx != (SEQ_TO_SN(sc) & 0xff)) { diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c index 1d539e3b8db..a6391c7fea5 100644 --- a/drivers/net/wireless/iwlwifi/iwl-5000.c +++ b/drivers/net/wireless/iwlwifi/iwl-5000.c @@ -1163,6 +1163,12 @@ static int iwl5000_tx_status_reply_tx(struct iwl_priv *priv, agg->frame_count, txq_id, idx); hdr = iwl_tx_queue_get_hdr(priv, txq_id, idx); + if (!hdr) { + IWL_ERR(priv, + "BUG_ON idx doesn't point to valid skb" + " idx=%d, txq_id=%d\n", idx, txq_id); + return -1; + } sc = le16_to_cpu(hdr->seq_ctrl); if (idx != (SEQ_TO_SN(sc) & 0xff)) { diff --git a/drivers/net/wireless/iwlwifi/iwl-rx.c b/drivers/net/wireless/iwlwifi/iwl-rx.c index b90adcb73b0..8e1bb53c0aa 100644 --- a/drivers/net/wireless/iwlwifi/iwl-rx.c +++ b/drivers/net/wireless/iwlwifi/iwl-rx.c @@ -250,12 +250,20 @@ void iwl_rx_allocate(struct iwl_priv *priv, gfp_t priority) } spin_unlock_irqrestore(&rxq->lock, flags); + if (rxq->free_count > RX_LOW_WATERMARK) + priority |= __GFP_NOWARN; /* Alloc a new receive buffer */ skb = alloc_skb(priv->hw_params.rx_buf_size + 256, priority); if (!skb) { - IWL_CRIT(priv, "Can not allocate SKB buffers\n"); + if (net_ratelimit()) + IWL_DEBUG_INFO(priv, "Failed to allocate SKB buffer.\n"); + if ((rxq->free_count <= RX_LOW_WATERMARK) && + net_ratelimit()) + IWL_CRIT(priv, "Failed to allocate SKB buffer with %s. Only %u free buffers remaining.\n", + priority == GFP_ATOMIC ? "GFP_ATOMIC" : "GFP_KERNEL", + rxq->free_count); /* We don't reschedule replenish work here -- we will * call the restock method and if it still needs * more buffers it will schedule replenish */ diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index a2b9ec82b96..c6633fec821 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c @@ -520,7 +520,7 @@ int iwl_send_static_wepkey_cmd(struct iwl_priv *priv, u8 send_if_empty) struct iwl_host_cmd cmd = { .id = REPLY_WEPKEY, .data = wep_cmd, - .flags = CMD_SYNC, + .flags = CMD_ASYNC, }; memset(wep_cmd, 0, cmd_size + diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 090966837f3..4f2d4393728 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -1146,11 +1146,18 @@ static void iwl3945_rx_allocate(struct iwl_priv *priv, gfp_t priority) } spin_unlock_irqrestore(&rxq->lock, flags); + if (rxq->free_count > RX_LOW_WATERMARK) + priority |= __GFP_NOWARN; /* Alloc a new receive buffer */ skb = alloc_skb(priv->hw_params.rx_buf_size, priority); if (!skb) { if (net_ratelimit()) - IWL_CRIT(priv, ": Can not allocate SKB buffers\n"); + IWL_DEBUG_INFO(priv, "Failed to allocate SKB buffer.\n"); + if ((rxq->free_count <= RX_LOW_WATERMARK) && + net_ratelimit()) + IWL_CRIT(priv, "Failed to allocate SKB buffer with %s. Only %u free buffers remaining.\n", + priority == GFP_ATOMIC ? "GFP_ATOMIC" : "GFP_KERNEL", + rxq->free_count); /* We don't reschedule replenish work here -- we will * call the restock method and if it still needs * more buffers it will schedule replenish */ diff --git a/drivers/net/wireless/rt2x00/rt2x00lib.h b/drivers/net/wireless/rt2x00/rt2x00lib.h index 5462cb5ad99..567f029a8cd 100644 --- a/drivers/net/wireless/rt2x00/rt2x00lib.h +++ b/drivers/net/wireless/rt2x00/rt2x00lib.h @@ -380,7 +380,7 @@ static inline void rt2x00crypto_tx_insert_iv(struct sk_buff *skb, { } -static inline void rt2x00crypto_rx_insert_iv(struct sk_buff *skb, bool l2pad, +static inline void rt2x00crypto_rx_insert_iv(struct sk_buff *skb, unsigned int header_length, struct rxdone_entry_desc *rxdesc) { diff --git a/drivers/net/wireless/wl12xx/Kconfig b/drivers/net/wireless/wl12xx/Kconfig index 7b14d5bc63d..88060e11754 100644 --- a/drivers/net/wireless/wl12xx/Kconfig +++ b/drivers/net/wireless/wl12xx/Kconfig @@ -1,5 +1,5 @@ menuconfig WL12XX - boolean "TI wl12xx driver support" + tristate "TI wl12xx driver support" depends on MAC80211 && WLAN_80211 && EXPERIMENTAL ---help--- This will enable TI wl12xx driver support. The drivers make diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index 38688847d56..23a6a6d4863 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -1070,7 +1070,7 @@ static int eject_installer(struct usb_interface *intf) /* Find bulk out endpoint */ endpoint = &iface_desc->endpoint[1].desc; - if ((endpoint->bEndpointAddress & USB_TYPE_MASK) == USB_DIR_OUT && + if (usb_endpoint_dir_out(endpoint) && usb_endpoint_xfer_bulk(endpoint)) { bulk_out_ep = endpoint->bEndpointAddress; } else { diff --git a/drivers/net/xilinx_emaclite.c b/drivers/net/xilinx_emaclite.c index dc22782633a..83a044dbd1d 100644 --- a/drivers/net/xilinx_emaclite.c +++ b/drivers/net/xilinx_emaclite.c @@ -134,18 +134,15 @@ static void xemaclite_enable_interrupts(struct net_local *drvdata) } /* Enable the Rx interrupts for the first buffer */ - reg_data = in_be32(drvdata->base_addr + XEL_RSR_OFFSET); out_be32(drvdata->base_addr + XEL_RSR_OFFSET, - reg_data | XEL_RSR_RECV_IE_MASK); + XEL_RSR_RECV_IE_MASK); /* Enable the Rx interrupts for the second Buffer if * configured in HW */ if (drvdata->rx_ping_pong != 0) { - reg_data = in_be32(drvdata->base_addr + XEL_BUFFER_OFFSET + - XEL_RSR_OFFSET); out_be32(drvdata->base_addr + XEL_BUFFER_OFFSET + XEL_RSR_OFFSET, - reg_data | XEL_RSR_RECV_IE_MASK); + XEL_RSR_RECV_IE_MASK); } /* Enable the Global Interrupt Enable */ diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 8574622e36a..c9e2ae90f19 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -154,9 +154,8 @@ int sync_start(void) { int err; - if (!alloc_cpumask_var(&marked_cpus, GFP_KERNEL)) + if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL)) return -ENOMEM; - cpumask_clear(marked_cpus); start_cpu_work(); diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c index 554e11f9e1c..8eefe56f1cb 100644 --- a/drivers/parport/procfs.c +++ b/drivers/parport/procfs.c @@ -31,7 +31,7 @@ #define PARPORT_MIN_SPINTIME_VALUE 1 #define PARPORT_MAX_SPINTIME_VALUE 1000 -static int do_active_device(ctl_table *table, int write, struct file *filp, +static int do_active_device(ctl_table *table, int write, void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; @@ -68,7 +68,7 @@ static int do_active_device(ctl_table *table, int write, struct file *filp, } #ifdef CONFIG_PARPORT_1284 -static int do_autoprobe(ctl_table *table, int write, struct file *filp, +static int do_autoprobe(ctl_table *table, int write, void __user *result, size_t *lenp, loff_t *ppos) { struct parport_device_info *info = table->extra2; @@ -111,7 +111,7 @@ static int do_autoprobe(ctl_table *table, int write, struct file *filp, #endif /* IEEE1284.3 support. */ static int do_hardware_base_addr (ctl_table *table, int write, - struct file *filp, void __user *result, + void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; @@ -139,7 +139,7 @@ static int do_hardware_base_addr (ctl_table *table, int write, } static int do_hardware_irq (ctl_table *table, int write, - struct file *filp, void __user *result, + void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; @@ -167,7 +167,7 @@ static int do_hardware_irq (ctl_table *table, int write, } static int do_hardware_dma (ctl_table *table, int write, - struct file *filp, void __user *result, + void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; @@ -195,7 +195,7 @@ static int do_hardware_dma (ctl_table *table, int write, } static int do_hardware_modes (ctl_table *table, int write, - struct file *filp, void __user *result, + void __user *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 36faa9a8e18..3070f77eb56 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -72,15 +72,9 @@ do { \ #define SLOT_NAME_SIZE 10 struct slot { - u8 bus; - u8 device; u8 state; - u8 hp_slot; - u32 number; struct controller *ctrl; - struct hpc_ops *hpc_ops; struct hotplug_slot *hotplug_slot; - struct list_head slot_list; struct delayed_work work; /* work for button event */ struct mutex lock; }; @@ -92,18 +86,10 @@ struct event_info { }; struct controller { - struct mutex crit_sect; /* critical section mutex */ struct mutex ctrl_lock; /* controller lock */ - int num_slots; /* Number of slots on ctlr */ - int slot_num_inc; /* 1 or -1 */ - struct pci_dev *pci_dev; struct pcie_device *pcie; /* PCI Express port service */ - struct list_head slot_list; - struct hpc_ops *hpc_ops; + struct slot *slot; wait_queue_head_t queue; /* sleep & wake process */ - u8 slot_device_offset; - u32 first_slot; /* First physical slot number */ /* PCIE only has 1 slot */ - u8 slot_bus; /* Bus where the slots handled by this controller sit */ u32 slot_cap; u8 cap_base; struct timer_list poll_timer; @@ -131,40 +117,20 @@ struct controller { #define POWERON_STATE 3 #define POWEROFF_STATE 4 -/* Error messages */ -#define INTERLOCK_OPEN 0x00000002 -#define ADD_NOT_SUPPORTED 0x00000003 -#define CARD_FUNCTIONING 0x00000005 -#define ADAPTER_NOT_SAME 0x00000006 -#define NO_ADAPTER_PRESENT 0x00000009 -#define NOT_ENOUGH_RESOURCES 0x0000000B -#define DEVICE_TYPE_NOT_SUPPORTED 0x0000000C -#define WRONG_BUS_FREQUENCY 0x0000000D -#define POWER_FAILURE 0x0000000E - -/* Field definitions in Slot Capabilities Register */ -#define ATTN_BUTTN_PRSN 0x00000001 -#define PWR_CTRL_PRSN 0x00000002 -#define MRL_SENS_PRSN 0x00000004 -#define ATTN_LED_PRSN 0x00000008 -#define PWR_LED_PRSN 0x00000010 -#define HP_SUPR_RM_SUP 0x00000020 -#define EMI_PRSN 0x00020000 -#define NO_CMD_CMPL_SUP 0x00040000 - -#define ATTN_BUTTN(ctrl) ((ctrl)->slot_cap & ATTN_BUTTN_PRSN) -#define POWER_CTRL(ctrl) ((ctrl)->slot_cap & PWR_CTRL_PRSN) -#define MRL_SENS(ctrl) ((ctrl)->slot_cap & MRL_SENS_PRSN) -#define ATTN_LED(ctrl) ((ctrl)->slot_cap & ATTN_LED_PRSN) -#define PWR_LED(ctrl) ((ctrl)->slot_cap & PWR_LED_PRSN) -#define HP_SUPR_RM(ctrl) ((ctrl)->slot_cap & HP_SUPR_RM_SUP) -#define EMI(ctrl) ((ctrl)->slot_cap & EMI_PRSN) -#define NO_CMD_CMPL(ctrl) ((ctrl)->slot_cap & NO_CMD_CMPL_SUP) +#define ATTN_BUTTN(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_ABP) +#define POWER_CTRL(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PCP) +#define MRL_SENS(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_MRLSP) +#define ATTN_LED(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_AIP) +#define PWR_LED(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PIP) +#define HP_SUPR_RM(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_HPS) +#define EMI(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_EIP) +#define NO_CMD_CMPL(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_NCCS) +#define PSN(ctrl) ((ctrl)->slot_cap >> 19) extern int pciehp_sysfs_enable_slot(struct slot *slot); extern int pciehp_sysfs_disable_slot(struct slot *slot); extern u8 pciehp_handle_attention_button(struct slot *p_slot); - extern u8 pciehp_handle_switch_change(struct slot *p_slot); +extern u8 pciehp_handle_switch_change(struct slot *p_slot); extern u8 pciehp_handle_presence_change(struct slot *p_slot); extern u8 pciehp_handle_power_fault(struct slot *p_slot); extern int pciehp_configure_device(struct slot *p_slot); @@ -175,45 +141,30 @@ int pcie_init_notification(struct controller *ctrl); int pciehp_enable_slot(struct slot *p_slot); int pciehp_disable_slot(struct slot *p_slot); int pcie_enable_notification(struct controller *ctrl); +int pciehp_power_on_slot(struct slot *slot); +int pciehp_power_off_slot(struct slot *slot); +int pciehp_get_power_status(struct slot *slot, u8 *status); +int pciehp_get_attention_status(struct slot *slot, u8 *status); + +int pciehp_set_attention_status(struct slot *slot, u8 status); +int pciehp_get_latch_status(struct slot *slot, u8 *status); +int pciehp_get_adapter_status(struct slot *slot, u8 *status); +int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *speed); +int pciehp_get_max_link_width(struct slot *slot, enum pcie_link_width *val); +int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *speed); +int pciehp_get_cur_link_width(struct slot *slot, enum pcie_link_width *val); +int pciehp_query_power_fault(struct slot *slot); +void pciehp_green_led_on(struct slot *slot); +void pciehp_green_led_off(struct slot *slot); +void pciehp_green_led_blink(struct slot *slot); +int pciehp_check_link_status(struct controller *ctrl); +void pciehp_release_ctrl(struct controller *ctrl); static inline const char *slot_name(struct slot *slot) { return hotplug_slot_name(slot->hotplug_slot); } -static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device) -{ - struct slot *slot; - - list_for_each_entry(slot, &ctrl->slot_list, slot_list) { - if (slot->device == device) - return slot; - } - - ctrl_err(ctrl, "Slot (device=0x%02x) not found\n", device); - return NULL; -} - -struct hpc_ops { - int (*power_on_slot)(struct slot *slot); - int (*power_off_slot)(struct slot *slot); - int (*get_power_status)(struct slot *slot, u8 *status); - int (*get_attention_status)(struct slot *slot, u8 *status); - int (*set_attention_status)(struct slot *slot, u8 status); - int (*get_latch_status)(struct slot *slot, u8 *status); - int (*get_adapter_status)(struct slot *slot, u8 *status); - int (*get_max_bus_speed)(struct slot *slot, enum pci_bus_speed *speed); - int (*get_cur_bus_speed)(struct slot *slot, enum pci_bus_speed *speed); - int (*get_max_lnk_width)(struct slot *slot, enum pcie_link_width *val); - int (*get_cur_lnk_width)(struct slot *slot, enum pcie_link_width *val); - int (*query_power_fault)(struct slot *slot); - void (*green_led_on)(struct slot *slot); - void (*green_led_off)(struct slot *slot); - void (*green_led_blink)(struct slot *slot); - void (*release_ctlr)(struct controller *ctrl); - int (*check_lnk_status)(struct controller *ctrl); -}; - #ifdef CONFIG_ACPI #include <acpi/acpi.h> #include <acpi/acpi_bus.h> diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index 7163e6a6cfa..37c8d3d0323 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -33,6 +33,11 @@ #define PCIEHP_DETECT_AUTO (2) #define PCIEHP_DETECT_DEFAULT PCIEHP_DETECT_AUTO +struct dummy_slot { + u32 number; + struct list_head list; +}; + static int slot_detection_mode; static char *pciehp_detect_mode; module_param(pciehp_detect_mode, charp, 0444); @@ -77,7 +82,7 @@ static int __init dummy_probe(struct pcie_device *dev) int pos; u32 slot_cap; acpi_handle handle; - struct slot *slot, *tmp; + struct dummy_slot *slot, *tmp; struct pci_dev *pdev = dev->port; /* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */ if (pciehp_get_hp_hw_control_from_firmware(pdev)) @@ -89,11 +94,11 @@ static int __init dummy_probe(struct pcie_device *dev) if (!slot) return -ENOMEM; slot->number = slot_cap >> 19; - list_for_each_entry(tmp, &dummy_slots, slot_list) { + list_for_each_entry(tmp, &dummy_slots, list) { if (tmp->number == slot->number) dup_slot_id++; } - list_add_tail(&slot->slot_list, &dummy_slots); + list_add_tail(&slot->list, &dummy_slots); handle = DEVICE_ACPI_HANDLE(&pdev->dev); if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle)) acpi_slot_detected = 1; @@ -109,11 +114,11 @@ static struct pcie_port_service_driver __initdata dummy_driver = { static int __init select_detection_mode(void) { - struct slot *slot, *tmp; + struct dummy_slot *slot, *tmp; pcie_port_service_register(&dummy_driver); pcie_port_service_unregister(&dummy_driver); - list_for_each_entry_safe(slot, tmp, &dummy_slots, slot_list) { - list_del(&slot->slot_list); + list_for_each_entry_safe(slot, tmp, &dummy_slots, list) { + list_del(&slot->list); kfree(slot); } if (acpi_slot_detected && dup_slot_id) diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 2317557fdee..bc234719b1d 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -99,65 +99,55 @@ static void release_slot(struct hotplug_slot *hotplug_slot) kfree(hotplug_slot); } -static int init_slots(struct controller *ctrl) +static int init_slot(struct controller *ctrl) { - struct slot *slot; - struct hotplug_slot *hotplug_slot; - struct hotplug_slot_info *info; + struct slot *slot = ctrl->slot; + struct hotplug_slot *hotplug = NULL; + struct hotplug_slot_info *info = NULL; char name[SLOT_NAME_SIZE]; int retval = -ENOMEM; - list_for_each_entry(slot, &ctrl->slot_list, slot_list) { - hotplug_slot = kzalloc(sizeof(*hotplug_slot), GFP_KERNEL); - if (!hotplug_slot) - goto error; - - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - goto error_hpslot; - - /* register this slot with the hotplug pci core */ - hotplug_slot->info = info; - hotplug_slot->private = slot; - hotplug_slot->release = &release_slot; - hotplug_slot->ops = &pciehp_hotplug_slot_ops; - slot->hotplug_slot = hotplug_slot; - snprintf(name, SLOT_NAME_SIZE, "%u", slot->number); - - ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x " - "hp_slot=%x sun=%x slot_device_offset=%x\n", - pci_domain_nr(ctrl->pci_dev->subordinate), - slot->bus, slot->device, slot->hp_slot, slot->number, - ctrl->slot_device_offset); - retval = pci_hp_register(hotplug_slot, - ctrl->pci_dev->subordinate, - slot->device, - name); - if (retval) { - ctrl_err(ctrl, "pci_hp_register failed with error %d\n", - retval); - goto error_info; - } - get_power_status(hotplug_slot, &info->power_status); - get_attention_status(hotplug_slot, &info->attention_status); - get_latch_status(hotplug_slot, &info->latch_status); - get_adapter_status(hotplug_slot, &info->adapter_status); + hotplug = kzalloc(sizeof(*hotplug), GFP_KERNEL); + if (!hotplug) + goto out; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + goto out; + + /* register this slot with the hotplug pci core */ + hotplug->info = info; + hotplug->private = slot; + hotplug->release = &release_slot; + hotplug->ops = &pciehp_hotplug_slot_ops; + slot->hotplug_slot = hotplug; + snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl)); + + ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:00 sun=%x\n", + pci_domain_nr(ctrl->pcie->port->subordinate), + ctrl->pcie->port->subordinate->number, PSN(ctrl)); + retval = pci_hp_register(hotplug, + ctrl->pcie->port->subordinate, 0, name); + if (retval) { + ctrl_err(ctrl, + "pci_hp_register failed with error %d\n", retval); + goto out; + } + get_power_status(hotplug, &info->power_status); + get_attention_status(hotplug, &info->attention_status); + get_latch_status(hotplug, &info->latch_status); + get_adapter_status(hotplug, &info->adapter_status); +out: + if (retval) { + kfree(info); + kfree(hotplug); } - - return 0; -error_info: - kfree(info); -error_hpslot: - kfree(hotplug_slot); -error: return retval; } -static void cleanup_slots(struct controller *ctrl) +static void cleanup_slot(struct controller *ctrl) { - struct slot *slot; - list_for_each_entry(slot, &ctrl->slot_list, slot_list) - pci_hp_deregister(slot->hotplug_slot); + pci_hp_deregister(ctrl->slot->hotplug_slot); } /* @@ -173,7 +163,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status) hotplug_slot->info->attention_status = status; if (ATTN_LED(slot->ctrl)) - slot->hpc_ops->set_attention_status(slot, status); + pciehp_set_attention_status(slot, status); return 0; } @@ -208,7 +198,7 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value) ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = slot->hpc_ops->get_power_status(slot, value); + retval = pciehp_get_power_status(slot, value); if (retval < 0) *value = hotplug_slot->info->power_status; @@ -223,7 +213,7 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value) ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = slot->hpc_ops->get_attention_status(slot, value); + retval = pciehp_get_attention_status(slot, value); if (retval < 0) *value = hotplug_slot->info->attention_status; @@ -238,7 +228,7 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value) ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = slot->hpc_ops->get_latch_status(slot, value); + retval = pciehp_get_latch_status(slot, value); if (retval < 0) *value = hotplug_slot->info->latch_status; @@ -253,7 +243,7 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value) ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = slot->hpc_ops->get_adapter_status(slot, value); + retval = pciehp_get_adapter_status(slot, value); if (retval < 0) *value = hotplug_slot->info->adapter_status; @@ -269,7 +259,7 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot, ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = slot->hpc_ops->get_max_bus_speed(slot, value); + retval = pciehp_get_max_link_speed(slot, value); if (retval < 0) *value = PCI_SPEED_UNKNOWN; @@ -284,7 +274,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n", __func__, slot_name(slot)); - retval = slot->hpc_ops->get_cur_bus_speed(slot, value); + retval = pciehp_get_cur_link_speed(slot, value); if (retval < 0) *value = PCI_SPEED_UNKNOWN; @@ -295,7 +285,7 @@ static int pciehp_probe(struct pcie_device *dev) { int rc; struct controller *ctrl; - struct slot *t_slot; + struct slot *slot; u8 value; struct pci_dev *pdev = dev->port; @@ -314,7 +304,7 @@ static int pciehp_probe(struct pcie_device *dev) set_service_data(dev, ctrl); /* Setup the slot information structures */ - rc = init_slots(ctrl); + rc = init_slot(ctrl); if (rc) { if (rc == -EBUSY) ctrl_warn(ctrl, "Slot already registered by another " @@ -332,15 +322,15 @@ static int pciehp_probe(struct pcie_device *dev) } /* Check if slot is occupied */ - t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); - t_slot->hpc_ops->get_adapter_status(t_slot, &value); + slot = ctrl->slot; + pciehp_get_adapter_status(slot, &value); if (value) { if (pciehp_force) - pciehp_enable_slot(t_slot); + pciehp_enable_slot(slot); } else { /* Power off slot if not occupied */ if (POWER_CTRL(ctrl)) { - rc = t_slot->hpc_ops->power_off_slot(t_slot); + rc = pciehp_power_off_slot(slot); if (rc) goto err_out_free_ctrl_slot; } @@ -349,19 +339,19 @@ static int pciehp_probe(struct pcie_device *dev) return 0; err_out_free_ctrl_slot: - cleanup_slots(ctrl); + cleanup_slot(ctrl); err_out_release_ctlr: - ctrl->hpc_ops->release_ctlr(ctrl); + pciehp_release_ctrl(ctrl); err_out_none: return -ENODEV; } -static void pciehp_remove (struct pcie_device *dev) +static void pciehp_remove(struct pcie_device *dev) { struct controller *ctrl = get_service_data(dev); - cleanup_slots(ctrl); - ctrl->hpc_ops->release_ctlr(ctrl); + cleanup_slot(ctrl); + pciehp_release_ctrl(ctrl); } #ifdef CONFIG_PM @@ -376,20 +366,20 @@ static int pciehp_resume (struct pcie_device *dev) dev_info(&dev->device, "%s ENTRY\n", __func__); if (pciehp_force) { struct controller *ctrl = get_service_data(dev); - struct slot *t_slot; + struct slot *slot; u8 status; /* reinitialize the chipset's event detection logic */ pcie_enable_notification(ctrl); - t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); + slot = ctrl->slot; /* Check if slot is occupied */ - t_slot->hpc_ops->get_adapter_status(t_slot, &status); + pciehp_get_adapter_status(slot, &status); if (status) - pciehp_enable_slot(t_slot); + pciehp_enable_slot(slot); else - pciehp_disable_slot(t_slot); + pciehp_disable_slot(slot); } return 0; } diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index b97cb4c3e0f..84487d126e4 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -82,7 +82,7 @@ u8 pciehp_handle_switch_change(struct slot *p_slot) /* Switch Change */ ctrl_dbg(ctrl, "Switch interrupt received\n"); - p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); + pciehp_get_latch_status(p_slot, &getstatus); if (getstatus) { /* * Switch opened @@ -114,7 +114,7 @@ u8 pciehp_handle_presence_change(struct slot *p_slot) /* Switch is open, assume a presence change * Save the presence state */ - p_slot->hpc_ops->get_adapter_status(p_slot, &presence_save); + pciehp_get_adapter_status(p_slot, &presence_save); if (presence_save) { /* * Card Present @@ -143,7 +143,7 @@ u8 pciehp_handle_power_fault(struct slot *p_slot) /* power fault */ ctrl_dbg(ctrl, "Power fault interrupt received\n"); - if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) { + if (!pciehp_query_power_fault(p_slot)) { /* * power fault Cleared */ @@ -172,7 +172,7 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot) { /* turn off slot, turn on Amber LED, turn off Green LED if supported*/ if (POWER_CTRL(ctrl)) { - if (pslot->hpc_ops->power_off_slot(pslot)) { + if (pciehp_power_off_slot(pslot)) { ctrl_err(ctrl, "Issue of Slot Power Off command failed\n"); return; @@ -186,10 +186,10 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot) } if (PWR_LED(ctrl)) - pslot->hpc_ops->green_led_off(pslot); + pciehp_green_led_off(pslot); if (ATTN_LED(ctrl)) { - if (pslot->hpc_ops->set_attention_status(pslot, 1)) { + if (pciehp_set_attention_status(pslot, 1)) { ctrl_err(ctrl, "Issue of Set Attention Led command failed\n"); return; @@ -208,24 +208,20 @@ static int board_added(struct slot *p_slot) { int retval = 0; struct controller *ctrl = p_slot->ctrl; - struct pci_bus *parent = ctrl->pci_dev->subordinate; - - ctrl_dbg(ctrl, "%s: slot device, slot offset, hp slot = %d, %d, %d\n", - __func__, p_slot->device, ctrl->slot_device_offset, - p_slot->hp_slot); + struct pci_bus *parent = ctrl->pcie->port->subordinate; if (POWER_CTRL(ctrl)) { /* Power on slot */ - retval = p_slot->hpc_ops->power_on_slot(p_slot); + retval = pciehp_power_on_slot(p_slot); if (retval) return retval; } if (PWR_LED(ctrl)) - p_slot->hpc_ops->green_led_blink(p_slot); + pciehp_green_led_blink(p_slot); /* Check link training status */ - retval = p_slot->hpc_ops->check_lnk_status(ctrl); + retval = pciehp_check_link_status(ctrl); if (retval) { ctrl_err(ctrl, "Failed to check link status\n"); set_slot_off(ctrl, p_slot); @@ -233,21 +229,21 @@ static int board_added(struct slot *p_slot) } /* Check for a power fault */ - if (p_slot->hpc_ops->query_power_fault(p_slot)) { + if (pciehp_query_power_fault(p_slot)) { ctrl_dbg(ctrl, "Power fault detected\n"); - retval = POWER_FAILURE; + retval = -EIO; goto err_exit; } retval = pciehp_configure_device(p_slot); if (retval) { - ctrl_err(ctrl, "Cannot add device at %04x:%02x:%02x\n", - pci_domain_nr(parent), p_slot->bus, p_slot->device); + ctrl_err(ctrl, "Cannot add device at %04x:%02x:00\n", + pci_domain_nr(parent), parent->number); goto err_exit; } if (PWR_LED(ctrl)) - p_slot->hpc_ops->green_led_on(p_slot); + pciehp_green_led_on(p_slot); return 0; @@ -269,11 +265,9 @@ static int remove_board(struct slot *p_slot) if (retval) return retval; - ctrl_dbg(ctrl, "%s: hp_slot = %d\n", __func__, p_slot->hp_slot); - if (POWER_CTRL(ctrl)) { /* power off slot */ - retval = p_slot->hpc_ops->power_off_slot(p_slot); + retval = pciehp_power_off_slot(p_slot); if (retval) { ctrl_err(ctrl, "Issue of Slot Disable command failed\n"); @@ -287,9 +281,9 @@ static int remove_board(struct slot *p_slot) msleep(1000); } + /* turn off Green LED */ if (PWR_LED(ctrl)) - /* turn off Green LED */ - p_slot->hpc_ops->green_led_off(p_slot); + pciehp_green_led_off(p_slot); return 0; } @@ -317,18 +311,17 @@ static void pciehp_power_thread(struct work_struct *work) case POWEROFF_STATE: mutex_unlock(&p_slot->lock); ctrl_dbg(p_slot->ctrl, - "Disabling domain:bus:device=%04x:%02x:%02x\n", - pci_domain_nr(p_slot->ctrl->pci_dev->subordinate), - p_slot->bus, p_slot->device); + "Disabling domain:bus:device=%04x:%02x:00\n", + pci_domain_nr(p_slot->ctrl->pcie->port->subordinate), + p_slot->ctrl->pcie->port->subordinate->number); pciehp_disable_slot(p_slot); mutex_lock(&p_slot->lock); p_slot->state = STATIC_STATE; break; case POWERON_STATE: mutex_unlock(&p_slot->lock); - if (pciehp_enable_slot(p_slot) && - PWR_LED(p_slot->ctrl)) - p_slot->hpc_ops->green_led_off(p_slot); + if (pciehp_enable_slot(p_slot) && PWR_LED(p_slot->ctrl)) + pciehp_green_led_off(p_slot); mutex_lock(&p_slot->lock); p_slot->state = STATIC_STATE; break; @@ -379,10 +372,10 @@ static int update_slot_info(struct slot *slot) if (!info) return -ENOMEM; - slot->hpc_ops->get_power_status(slot, &(info->power_status)); - slot->hpc_ops->get_attention_status(slot, &(info->attention_status)); - slot->hpc_ops->get_latch_status(slot, &(info->latch_status)); - slot->hpc_ops->get_adapter_status(slot, &(info->adapter_status)); + pciehp_get_power_status(slot, &info->power_status); + pciehp_get_attention_status(slot, &info->attention_status); + pciehp_get_latch_status(slot, &info->latch_status); + pciehp_get_adapter_status(slot, &info->adapter_status); result = pci_hp_change_slot_info(slot->hotplug_slot, info); kfree (info); @@ -399,7 +392,7 @@ static void handle_button_press_event(struct slot *p_slot) switch (p_slot->state) { case STATIC_STATE: - p_slot->hpc_ops->get_power_status(p_slot, &getstatus); + pciehp_get_power_status(p_slot, &getstatus); if (getstatus) { p_slot->state = BLINKINGOFF_STATE; ctrl_info(ctrl, @@ -413,9 +406,9 @@ static void handle_button_press_event(struct slot *p_slot) } /* blink green LED and turn off amber */ if (PWR_LED(ctrl)) - p_slot->hpc_ops->green_led_blink(p_slot); + pciehp_green_led_blink(p_slot); if (ATTN_LED(ctrl)) - p_slot->hpc_ops->set_attention_status(p_slot, 0); + pciehp_set_attention_status(p_slot, 0); schedule_delayed_work(&p_slot->work, 5*HZ); break; @@ -430,13 +423,13 @@ static void handle_button_press_event(struct slot *p_slot) cancel_delayed_work(&p_slot->work); if (p_slot->state == BLINKINGOFF_STATE) { if (PWR_LED(ctrl)) - p_slot->hpc_ops->green_led_on(p_slot); + pciehp_green_led_on(p_slot); } else { if (PWR_LED(ctrl)) - p_slot->hpc_ops->green_led_off(p_slot); + pciehp_green_led_off(p_slot); } if (ATTN_LED(ctrl)) - p_slot->hpc_ops->set_attention_status(p_slot, 0); + pciehp_set_attention_status(p_slot, 0); ctrl_info(ctrl, "PCI slot #%s - action canceled " "due to button press\n", slot_name(p_slot)); p_slot->state = STATIC_STATE; @@ -474,7 +467,7 @@ static void handle_surprise_event(struct slot *p_slot) info->p_slot = p_slot; INIT_WORK(&info->work, pciehp_power_thread); - p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus); + pciehp_get_adapter_status(p_slot, &getstatus); if (!getstatus) p_slot->state = POWEROFF_STATE; else @@ -498,9 +491,9 @@ static void interrupt_event_handler(struct work_struct *work) if (!POWER_CTRL(ctrl)) break; if (ATTN_LED(ctrl)) - p_slot->hpc_ops->set_attention_status(p_slot, 1); + pciehp_set_attention_status(p_slot, 1); if (PWR_LED(ctrl)) - p_slot->hpc_ops->green_led_off(p_slot); + pciehp_green_led_off(p_slot); break; case INT_PRESENCE_ON: case INT_PRESENCE_OFF: @@ -525,45 +518,38 @@ int pciehp_enable_slot(struct slot *p_slot) int rc; struct controller *ctrl = p_slot->ctrl; - /* Check to see if (latch closed, card present, power off) */ - mutex_lock(&p_slot->ctrl->crit_sect); - - rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus); + rc = pciehp_get_adapter_status(p_slot, &getstatus); if (rc || !getstatus) { ctrl_info(ctrl, "No adapter on slot(%s)\n", slot_name(p_slot)); - mutex_unlock(&p_slot->ctrl->crit_sect); return -ENODEV; } if (MRL_SENS(p_slot->ctrl)) { - rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); + rc = pciehp_get_latch_status(p_slot, &getstatus); if (rc || getstatus) { ctrl_info(ctrl, "Latch open on slot(%s)\n", slot_name(p_slot)); - mutex_unlock(&p_slot->ctrl->crit_sect); return -ENODEV; } } if (POWER_CTRL(p_slot->ctrl)) { - rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus); + rc = pciehp_get_power_status(p_slot, &getstatus); if (rc || getstatus) { ctrl_info(ctrl, "Already enabled on slot(%s)\n", slot_name(p_slot)); - mutex_unlock(&p_slot->ctrl->crit_sect); return -EINVAL; } } - p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); + pciehp_get_latch_status(p_slot, &getstatus); rc = board_added(p_slot); if (rc) { - p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); + pciehp_get_latch_status(p_slot, &getstatus); } update_slot_info(p_slot); - mutex_unlock(&p_slot->ctrl->crit_sect); return rc; } @@ -577,35 +563,29 @@ int pciehp_disable_slot(struct slot *p_slot) if (!p_slot->ctrl) return 1; - /* Check to see if (latch closed, card present, power on) */ - mutex_lock(&p_slot->ctrl->crit_sect); - if (!HP_SUPR_RM(p_slot->ctrl)) { - ret = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus); + ret = pciehp_get_adapter_status(p_slot, &getstatus); if (ret || !getstatus) { ctrl_info(ctrl, "No adapter on slot(%s)\n", slot_name(p_slot)); - mutex_unlock(&p_slot->ctrl->crit_sect); return -ENODEV; } } if (MRL_SENS(p_slot->ctrl)) { - ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); + ret = pciehp_get_latch_status(p_slot, &getstatus); if (ret || getstatus) { ctrl_info(ctrl, "Latch open on slot(%s)\n", slot_name(p_slot)); - mutex_unlock(&p_slot->ctrl->crit_sect); return -ENODEV; } } if (POWER_CTRL(p_slot->ctrl)) { - ret = p_slot->hpc_ops->get_power_status(p_slot, &getstatus); + ret = pciehp_get_power_status(p_slot, &getstatus); if (ret || !getstatus) { ctrl_info(ctrl, "Already disabled on slot(%s)\n", slot_name(p_slot)); - mutex_unlock(&p_slot->ctrl->crit_sect); return -EINVAL; } } @@ -613,7 +593,6 @@ int pciehp_disable_slot(struct slot *p_slot) ret = remove_board(p_slot); update_slot_info(p_slot); - mutex_unlock(&p_slot->ctrl->crit_sect); return ret; } diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 271f917b6f2..9ef4605c1ef 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -44,25 +44,25 @@ static atomic_t pciehp_num_controllers = ATOMIC_INIT(0); static inline int pciehp_readw(struct controller *ctrl, int reg, u16 *value) { - struct pci_dev *dev = ctrl->pci_dev; + struct pci_dev *dev = ctrl->pcie->port; return pci_read_config_word(dev, ctrl->cap_base + reg, value); } static inline int pciehp_readl(struct controller *ctrl, int reg, u32 *value) { - struct pci_dev *dev = ctrl->pci_dev; + struct pci_dev *dev = ctrl->pcie->port; return pci_read_config_dword(dev, ctrl->cap_base + reg, value); } static inline int pciehp_writew(struct controller *ctrl, int reg, u16 value) { - struct pci_dev *dev = ctrl->pci_dev; + struct pci_dev *dev = ctrl->pcie->port; return pci_write_config_word(dev, ctrl->cap_base + reg, value); } static inline int pciehp_writel(struct controller *ctrl, int reg, u32 value) { - struct pci_dev *dev = ctrl->pci_dev; + struct pci_dev *dev = ctrl->pcie->port; return pci_write_config_dword(dev, ctrl->cap_base + reg, value); } @@ -266,7 +266,7 @@ static void pcie_wait_link_active(struct controller *ctrl) ctrl_dbg(ctrl, "Data Link Layer Link Active not set in 1000 msec\n"); } -static int hpc_check_lnk_status(struct controller *ctrl) +int pciehp_check_link_status(struct controller *ctrl) { u16 lnk_status; int retval = 0; @@ -305,7 +305,7 @@ static int hpc_check_lnk_status(struct controller *ctrl) return retval; } -static int hpc_get_attention_status(struct slot *slot, u8 *status) +int pciehp_get_attention_status(struct slot *slot, u8 *status) { struct controller *ctrl = slot->ctrl; u16 slot_ctrl; @@ -344,7 +344,7 @@ static int hpc_get_attention_status(struct slot *slot, u8 *status) return 0; } -static int hpc_get_power_status(struct slot *slot, u8 *status) +int pciehp_get_power_status(struct slot *slot, u8 *status) { struct controller *ctrl = slot->ctrl; u16 slot_ctrl; @@ -376,7 +376,7 @@ static int hpc_get_power_status(struct slot *slot, u8 *status) return retval; } -static int hpc_get_latch_status(struct slot *slot, u8 *status) +int pciehp_get_latch_status(struct slot *slot, u8 *status) { struct controller *ctrl = slot->ctrl; u16 slot_status; @@ -392,7 +392,7 @@ static int hpc_get_latch_status(struct slot *slot, u8 *status) return 0; } -static int hpc_get_adapter_status(struct slot *slot, u8 *status) +int pciehp_get_adapter_status(struct slot *slot, u8 *status) { struct controller *ctrl = slot->ctrl; u16 slot_status; @@ -408,7 +408,7 @@ static int hpc_get_adapter_status(struct slot *slot, u8 *status) return 0; } -static int hpc_query_power_fault(struct slot *slot) +int pciehp_query_power_fault(struct slot *slot) { struct controller *ctrl = slot->ctrl; u16 slot_status; @@ -422,7 +422,7 @@ static int hpc_query_power_fault(struct slot *slot) return !!(slot_status & PCI_EXP_SLTSTA_PFD); } -static int hpc_set_attention_status(struct slot *slot, u8 value) +int pciehp_set_attention_status(struct slot *slot, u8 value) { struct controller *ctrl = slot->ctrl; u16 slot_cmd; @@ -450,7 +450,7 @@ static int hpc_set_attention_status(struct slot *slot, u8 value) return rc; } -static void hpc_set_green_led_on(struct slot *slot) +void pciehp_green_led_on(struct slot *slot) { struct controller *ctrl = slot->ctrl; u16 slot_cmd; @@ -463,7 +463,7 @@ static void hpc_set_green_led_on(struct slot *slot) __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); } -static void hpc_set_green_led_off(struct slot *slot) +void pciehp_green_led_off(struct slot *slot) { struct controller *ctrl = slot->ctrl; u16 slot_cmd; @@ -476,7 +476,7 @@ static void hpc_set_green_led_off(struct slot *slot) __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); } -static void hpc_set_green_led_blink(struct slot *slot) +void pciehp_green_led_blink(struct slot *slot) { struct controller *ctrl = slot->ctrl; u16 slot_cmd; @@ -489,7 +489,7 @@ static void hpc_set_green_led_blink(struct slot *slot) __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); } -static int hpc_power_on_slot(struct slot * slot) +int pciehp_power_on_slot(struct slot * slot) { struct controller *ctrl = slot->ctrl; u16 slot_cmd; @@ -497,8 +497,6 @@ static int hpc_power_on_slot(struct slot * slot) u16 slot_status; int retval = 0; - ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot); - /* Clear sticky power-fault bit from previous power failures */ retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status); if (retval) { @@ -539,7 +537,7 @@ static int hpc_power_on_slot(struct slot * slot) static inline int pcie_mask_bad_dllp(struct controller *ctrl) { - struct pci_dev *dev = ctrl->pci_dev; + struct pci_dev *dev = ctrl->pcie->port; int pos; u32 reg; @@ -556,7 +554,7 @@ static inline int pcie_mask_bad_dllp(struct controller *ctrl) static inline void pcie_unmask_bad_dllp(struct controller *ctrl) { - struct pci_dev *dev = ctrl->pci_dev; + struct pci_dev *dev = ctrl->pcie->port; u32 reg; int pos; @@ -570,7 +568,7 @@ static inline void pcie_unmask_bad_dllp(struct controller *ctrl) pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg); } -static int hpc_power_off_slot(struct slot * slot) +int pciehp_power_off_slot(struct slot * slot) { struct controller *ctrl = slot->ctrl; u16 slot_cmd; @@ -578,8 +576,6 @@ static int hpc_power_off_slot(struct slot * slot) int retval = 0; int changed; - ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot); - /* * Set Bad DLLP Mask bit in Correctable Error Mask * Register. This is the workaround against Bad DLLP error @@ -614,8 +610,8 @@ static int hpc_power_off_slot(struct slot * slot) static irqreturn_t pcie_isr(int irq, void *dev_id) { struct controller *ctrl = (struct controller *)dev_id; + struct slot *slot = ctrl->slot; u16 detected, intr_loc; - struct slot *p_slot; /* * In order to guarantee that all interrupt events are @@ -656,29 +652,27 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) if (!(intr_loc & ~PCI_EXP_SLTSTA_CC)) return IRQ_HANDLED; - p_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); - /* Check MRL Sensor Changed */ if (intr_loc & PCI_EXP_SLTSTA_MRLSC) - pciehp_handle_switch_change(p_slot); + pciehp_handle_switch_change(slot); /* Check Attention Button Pressed */ if (intr_loc & PCI_EXP_SLTSTA_ABP) - pciehp_handle_attention_button(p_slot); + pciehp_handle_attention_button(slot); /* Check Presence Detect Changed */ if (intr_loc & PCI_EXP_SLTSTA_PDC) - pciehp_handle_presence_change(p_slot); + pciehp_handle_presence_change(slot); /* Check Power Fault Detected */ if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) { ctrl->power_fault_detected = 1; - pciehp_handle_power_fault(p_slot); + pciehp_handle_power_fault(slot); } return IRQ_HANDLED; } -static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value) +int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *value) { struct controller *ctrl = slot->ctrl; enum pcie_link_speed lnk_speed; @@ -709,7 +703,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value) return retval; } -static int hpc_get_max_lnk_width(struct slot *slot, +int pciehp_get_max_lnk_width(struct slot *slot, enum pcie_link_width *value) { struct controller *ctrl = slot->ctrl; @@ -759,7 +753,7 @@ static int hpc_get_max_lnk_width(struct slot *slot, return retval; } -static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value) +int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *value) { struct controller *ctrl = slot->ctrl; enum pcie_link_speed lnk_speed = PCI_SPEED_UNKNOWN; @@ -791,7 +785,7 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value) return retval; } -static int hpc_get_cur_lnk_width(struct slot *slot, +int pciehp_get_cur_lnk_width(struct slot *slot, enum pcie_link_width *value) { struct controller *ctrl = slot->ctrl; @@ -842,30 +836,6 @@ static int hpc_get_cur_lnk_width(struct slot *slot, return retval; } -static void pcie_release_ctrl(struct controller *ctrl); -static struct hpc_ops pciehp_hpc_ops = { - .power_on_slot = hpc_power_on_slot, - .power_off_slot = hpc_power_off_slot, - .set_attention_status = hpc_set_attention_status, - .get_power_status = hpc_get_power_status, - .get_attention_status = hpc_get_attention_status, - .get_latch_status = hpc_get_latch_status, - .get_adapter_status = hpc_get_adapter_status, - - .get_max_bus_speed = hpc_get_max_lnk_speed, - .get_cur_bus_speed = hpc_get_cur_lnk_speed, - .get_max_lnk_width = hpc_get_max_lnk_width, - .get_cur_lnk_width = hpc_get_cur_lnk_width, - - .query_power_fault = hpc_query_power_fault, - .green_led_on = hpc_set_green_led_on, - .green_led_off = hpc_set_green_led_off, - .green_led_blink = hpc_set_green_led_blink, - - .release_ctlr = pcie_release_ctrl, - .check_lnk_status = hpc_check_lnk_status, -}; - int pcie_enable_notification(struct controller *ctrl) { u16 cmd, mask; @@ -930,23 +900,16 @@ static int pcie_init_slot(struct controller *ctrl) if (!slot) return -ENOMEM; - slot->hp_slot = 0; slot->ctrl = ctrl; - slot->bus = ctrl->pci_dev->subordinate->number; - slot->device = ctrl->slot_device_offset + slot->hp_slot; - slot->hpc_ops = ctrl->hpc_ops; - slot->number = ctrl->first_slot; mutex_init(&slot->lock); INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work); - list_add(&slot->slot_list, &ctrl->slot_list); + ctrl->slot = slot; return 0; } static void pcie_cleanup_slot(struct controller *ctrl) { - struct slot *slot; - slot = list_first_entry(&ctrl->slot_list, struct slot, slot_list); - list_del(&slot->slot_list); + struct slot *slot = ctrl->slot; cancel_delayed_work(&slot->work); flush_scheduled_work(); flush_workqueue(pciehp_wq); @@ -957,7 +920,7 @@ static inline void dbg_ctrl(struct controller *ctrl) { int i; u16 reg16; - struct pci_dev *pdev = ctrl->pci_dev; + struct pci_dev *pdev = ctrl->pcie->port; if (!pciehp_debug) return; @@ -980,7 +943,7 @@ static inline void dbg_ctrl(struct controller *ctrl) (unsigned long long)pci_resource_start(pdev, i)); } ctrl_info(ctrl, "Slot Capabilities : 0x%08x\n", ctrl->slot_cap); - ctrl_info(ctrl, " Physical Slot Number : %d\n", ctrl->first_slot); + ctrl_info(ctrl, " Physical Slot Number : %d\n", PSN(ctrl)); ctrl_info(ctrl, " Attention Button : %3s\n", ATTN_BUTTN(ctrl) ? "yes" : "no"); ctrl_info(ctrl, " Power Controller : %3s\n", @@ -1014,10 +977,7 @@ struct controller *pcie_init(struct pcie_device *dev) dev_err(&dev->device, "%s: Out of memory\n", __func__); goto abort; } - INIT_LIST_HEAD(&ctrl->slot_list); - ctrl->pcie = dev; - ctrl->pci_dev = pdev; ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP); if (!ctrl->cap_base) { ctrl_err(ctrl, "Cannot find PCI Express capability\n"); @@ -1029,11 +989,6 @@ struct controller *pcie_init(struct pcie_device *dev) } ctrl->slot_cap = slot_cap; - ctrl->first_slot = slot_cap >> 19; - ctrl->slot_device_offset = 0; - ctrl->num_slots = 1; - ctrl->hpc_ops = &pciehp_hpc_ops; - mutex_init(&ctrl->crit_sect); mutex_init(&ctrl->ctrl_lock); init_waitqueue_head(&ctrl->queue); dbg_ctrl(ctrl); @@ -1089,7 +1044,7 @@ abort: return NULL; } -void pcie_release_ctrl(struct controller *ctrl) +void pciehp_release_ctrl(struct controller *ctrl) { pcie_shutdown_notification(ctrl); pcie_cleanup_slot(ctrl); diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c index 02e24d63b3e..21733108add 100644 --- a/drivers/pci/hotplug/pciehp_pci.c +++ b/drivers/pci/hotplug/pciehp_pci.c @@ -63,27 +63,27 @@ static int __ref pciehp_add_bridge(struct pci_dev *dev) int pciehp_configure_device(struct slot *p_slot) { struct pci_dev *dev; - struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate; + struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate; int num, fn; struct controller *ctrl = p_slot->ctrl; - dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0)); + dev = pci_get_slot(parent, PCI_DEVFN(0, 0)); if (dev) { ctrl_err(ctrl, "Device %s already exists " - "at %04x:%02x:%02x, cannot hot-add\n", pci_name(dev), - pci_domain_nr(parent), p_slot->bus, p_slot->device); + "at %04x:%02x:00, cannot hot-add\n", pci_name(dev), + pci_domain_nr(parent), parent->number); pci_dev_put(dev); return -EINVAL; } - num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0)); + num = pci_scan_slot(parent, PCI_DEVFN(0, 0)); if (num == 0) { ctrl_err(ctrl, "No new device found\n"); return -ENODEV; } for (fn = 0; fn < 8; fn++) { - dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, fn)); + dev = pci_get_slot(parent, PCI_DEVFN(0, fn)); if (!dev) continue; if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) { @@ -111,19 +111,18 @@ int pciehp_unconfigure_device(struct slot *p_slot) int j; u8 bctl = 0; u8 presence = 0; - struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate; + struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate; u16 command; struct controller *ctrl = p_slot->ctrl; - ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:%02x\n", - __func__, pci_domain_nr(parent), p_slot->bus, p_slot->device); - ret = p_slot->hpc_ops->get_adapter_status(p_slot, &presence); + ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n", + __func__, pci_domain_nr(parent), parent->number); + ret = pciehp_get_adapter_status(p_slot, &presence); if (ret) presence = 0; for (j = 0; j < 8; j++) { - struct pci_dev* temp = pci_get_slot(parent, - (p_slot->device << 3) | j); + struct pci_dev* temp = pci_get_slot(parent, PCI_DEVFN(0, j)); if (!temp) continue; if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) { diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 10c0e62bd5a..2ce8f9ccc66 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -318,6 +318,8 @@ static int __init aer_service_init(void) { if (pcie_aer_disable) return -ENXIO; + if (!pci_msi_enabled()) + return -ENXIO; return pcie_port_service_register(&aerdriver); } diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index f289ca9bf18..745402e8e49 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -303,9 +303,6 @@ static void pcie_get_aspm_reg(struct pci_dev *pdev, pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); pci_read_config_dword(pdev, pos + PCI_EXP_LNKCAP, ®32); info->support = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10; - /* 00b and 10b are defined as "Reserved". */ - if (info->support == PCIE_LINK_STATE_L1) - info->support = 0; info->latency_encoding_l0s = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12; info->latency_encoding_l1 = (reg32 & PCI_EXP_LNKCAP_L1EL) >> 15; pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, ®16); diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index fb45f5ee8df..454970d2d70 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -746,7 +746,9 @@ static acpi_status WMID_set_u32(u32 value, u32 cap, struct wmi_interface *iface) return AE_BAD_PARAMETER; if (quirks->mailled == 1) { param = value ? 0x92 : 0x93; + i8042_lock_chip(); i8042_command(¶m, 0x1059); + i8042_unlock_chip(); return 0; } break; diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index c431198bdbc..82daa3c1dc9 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -14,7 +14,6 @@ #include <linux/init.h> #include <linux/miscdevice.h> -#include <linux/utsname.h> #include <linux/debugfs.h> #include <asm/ipl.h> #include <asm/sclp.h> diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 82b34893e5b..9a4dd5992f6 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -117,8 +117,6 @@ source "drivers/staging/vt6655/Kconfig" source "drivers/staging/vt6656/Kconfig" -source "drivers/staging/cpc-usb/Kconfig" - source "drivers/staging/udlfb/Kconfig" source "drivers/staging/hv/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index b1cad0d9ba7..104f2f8897e 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -40,7 +40,6 @@ obj-$(CONFIG_USB_SERIAL_QUATECH_USB2) += quatech_usb2/ obj-$(CONFIG_OCTEON_ETHERNET) += octeon/ obj-$(CONFIG_VT6655) += vt6655/ obj-$(CONFIG_VT6656) += vt6656/ -obj-$(CONFIG_USB_CPC) += cpc-usb/ obj-$(CONFIG_FB_UDL) += udlfb/ obj-$(CONFIG_HYPERV) += hv/ obj-$(CONFIG_VME_BUS) += vme/ diff --git a/drivers/staging/cpc-usb/Kconfig b/drivers/staging/cpc-usb/Kconfig deleted file mode 100644 index 2be0bc9c39d..00000000000 --- a/drivers/staging/cpc-usb/Kconfig +++ /dev/null @@ -1,4 +0,0 @@ -config USB_CPC - tristate "CPC CAN USB driver" - depends on USB && PROC_FS - default n diff --git a/drivers/staging/cpc-usb/Makefile b/drivers/staging/cpc-usb/Makefile deleted file mode 100644 index 3f83170a8fa..00000000000 --- a/drivers/staging/cpc-usb/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-$(CONFIG_USB_CPC) += cpc-usb.o - -cpc-usb-y := cpc-usb_drv.o sja2m16c_2.o diff --git a/drivers/staging/cpc-usb/TODO b/drivers/staging/cpc-usb/TODO deleted file mode 100644 index 9b1752fb9cd..00000000000 --- a/drivers/staging/cpc-usb/TODO +++ /dev/null @@ -1,10 +0,0 @@ -Things to do for this driver to get merged into the main portion of the -kernel: - - checkpatch cleanups - - sparse clean - - remove proc code - - tie into CAN socket interfaces if possible - - figure out sane userspace api - - use linux's error codes - -Send patches to Greg Kroah-Hartman <greg@kroah.com> diff --git a/drivers/staging/cpc-usb/cpc-usb_drv.c b/drivers/staging/cpc-usb/cpc-usb_drv.c deleted file mode 100644 index c5eca46996f..00000000000 --- a/drivers/staging/cpc-usb/cpc-usb_drv.c +++ /dev/null @@ -1,1184 +0,0 @@ -/* - * CPC-USB CAN Interface Kernel Driver - * - * Copyright (C) 2004-2009 EMS Dr. Thomas Wuensche - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published - * by the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/module.h> -#include <linux/poll.h> -#include <linux/smp_lock.h> -#include <linux/completion.h> -#include <asm/uaccess.h> -#include <linux/usb.h> - - -#include <linux/proc_fs.h> - -#include "cpc.h" - -#include "cpc_int.h" -#include "cpcusb.h" - -#include "sja2m16c.h" - -/* Version Information */ -#define DRIVER_AUTHOR "Sebastian Haas <haas@ems-wuensche.com>" -#define DRIVER_DESC "CPC-USB Driver for Linux Kernel 2.6" -#define DRIVER_VERSION CPC_DRIVER_VERSION - -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESC); -MODULE_VERSION(DRIVER_VERSION); -MODULE_LICENSE("GPL v2"); - -/* Define these values to match your devices */ -#define USB_CPCUSB_VENDOR_ID 0x12D6 - -#define USB_CPCUSB_M16C_PRODUCT_ID 0x0888 -#define USB_CPCUSB_LPC2119_PRODUCT_ID 0x0444 - -#define CPC_USB_PROC_DIR CPC_PROC_DIR "cpc-usb" - -static struct proc_dir_entry *procDir; -static struct proc_dir_entry *procEntry; - -/* Module parameters */ -static int debug; -module_param(debug, int, S_IRUGO); - -/* table of devices that work with this driver */ -static struct usb_device_id cpcusb_table[] = { - {USB_DEVICE(USB_CPCUSB_VENDOR_ID, USB_CPCUSB_M16C_PRODUCT_ID)}, - {USB_DEVICE(USB_CPCUSB_VENDOR_ID, USB_CPCUSB_LPC2119_PRODUCT_ID)}, - {} /* Terminating entry */ -}; - -MODULE_DEVICE_TABLE(usb, cpcusb_table); - -/* use to prevent kernel panic if driver is unloaded - * while a programm has still open the device - */ -DECLARE_WAIT_QUEUE_HEAD(rmmodWq); -atomic_t useCount; - -static CPC_USB_T *CPCUSB_Table[CPC_USB_CARD_CNT] = { 0 }; -static unsigned int CPCUsbCnt; - -/* prevent races between open() and disconnect() */ -static DECLARE_MUTEX(disconnect_sem); - -/* local function prototypes */ -static ssize_t cpcusb_read(struct file *file, char *buffer, size_t count, - loff_t *ppos); -static ssize_t cpcusb_write(struct file *file, const char *buffer, - size_t count, loff_t *ppos); -static unsigned int cpcusb_poll(struct file *file, poll_table * wait); -static int cpcusb_open(struct inode *inode, struct file *file); -static int cpcusb_release(struct inode *inode, struct file *file); - -static int cpcusb_probe(struct usb_interface *interface, - const struct usb_device_id *id); -static void cpcusb_disconnect(struct usb_interface *interface); - -static void cpcusb_read_bulk_callback(struct urb *urb); -static void cpcusb_write_bulk_callback(struct urb *urb); -static void cpcusb_read_interrupt_callback(struct urb *urb); - -static int cpcusb_setup_intrep(CPC_USB_T *card); - -static struct file_operations cpcusb_fops = { - /* - * The owner field is part of the module-locking - * mechanism. The idea is that the kernel knows - * which module to increment the use-counter of - * BEFORE it calls the device's open() function. - * This also means that the kernel can decrement - * the use-counter again before calling release() - * or should the open() function fail. - */ - .owner = THIS_MODULE, - - .read = cpcusb_read, - .write = cpcusb_write, - .poll = cpcusb_poll, - .open = cpcusb_open, - .release = cpcusb_release, -}; - -/* - * usb class driver info in order to get a minor number from the usb core, - * and to have the device registered with devfs and the driver core - */ -static struct usb_class_driver cpcusb_class = { - .name = "usb/cpc_usb%d", - .fops = &cpcusb_fops, - .minor_base = CPC_USB_BASE_MNR, -}; - -/* usb specific object needed to register this driver with the usb subsystem */ -static struct usb_driver cpcusb_driver = { - .name = "cpc-usb", - .probe = cpcusb_probe, - .disconnect = cpcusb_disconnect, - .id_table = cpcusb_table, -}; - -static int cpcusb_create_info_output(char *buf) -{ - int i = 0, j; - - for (j = 0; j < CPC_USB_CARD_CNT; j++) { - if (CPCUSB_Table[j]) { - CPC_USB_T *card = CPCUSB_Table[j]; - CPC_CHAN_T *chan = card->chan; - - /* MINOR CHANNELNO BUSNO SLOTNO */ - i += sprintf(&buf[i], "%d %s\n", chan->minor, - card->serialNumber); - } - } - - return i; -} - -static int cpcusb_proc_read_info(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len = cpcusb_create_info_output(page); - - if (len <= off + count) - *eof = 1; - *start = page + off; - len -= off; - if (len > count) - len = count; - if (len < 0) - len = 0; - - return len; -} - -/* - * Remove CPC-USB and cleanup - */ -static inline void cpcusb_delete(CPC_USB_T *card) -{ - if (card) { - if (card->chan) { - if (card->chan->buf) - vfree(card->chan->buf); - - if (card->chan->CPCWait_q) - kfree(card->chan->CPCWait_q); - - kfree(card->chan); - } - - CPCUSB_Table[card->idx] = NULL; - kfree(card); - } -} - -/* - * setup the interrupt IN endpoint of a specific CPC-USB device - */ -static int cpcusb_setup_intrep(CPC_USB_T *card) -{ - int retval = 0; - struct usb_endpoint_descriptor *ep; - - ep = &card->interface->altsetting[0].endpoint[card->num_intr_in].desc; - - card->intr_in_buffer[0] = 0; - card->free_slots = 15; /* initial size */ - - /* setup the urb */ - usb_fill_int_urb(card->intr_in_urb, card->udev, - usb_rcvintpipe(card->udev, card->num_intr_in), - card->intr_in_buffer, - sizeof(card->intr_in_buffer), - cpcusb_read_interrupt_callback, - card, - ep->bInterval); - - card->intr_in_urb->status = 0; /* needed! */ - - /* submit the urb */ - retval = usb_submit_urb(card->intr_in_urb, GFP_KERNEL); - - if (retval) - err("%s - failed submitting intr urb, error %d", __func__, - retval); - - return retval; -} - -static int cpcusb_open(struct inode *inode, struct file *file) -{ - CPC_USB_T *card = NULL; - struct usb_interface *interface; - int subminor; - int j, retval = 0; - - subminor = iminor(inode); - - /* prevent disconnects */ - down(&disconnect_sem); - - interface = usb_find_interface(&cpcusb_driver, subminor); - if (!interface) { - err("%s - error, can't find device for minor %d", - __func__, subminor); - retval = CPC_ERR_NO_INTERFACE_PRESENT; - goto exit_no_device; - } - - card = usb_get_intfdata(interface); - if (!card) { - retval = CPC_ERR_NO_INTERFACE_PRESENT; - goto exit_no_device; - } - - /* lock this device */ - down(&card->sem); - - /* increment our usage count for the driver */ - if (card->open) { - dbg("device already opened"); - retval = CPC_ERR_CHANNEL_ALREADY_OPEN; - goto exit_on_error; - } - - /* save our object in the file's private structure */ - file->private_data = card; - for (j = 0; j < CPC_USB_URB_CNT; j++) { - usb_fill_bulk_urb(card->urbs[j].urb, card->udev, - usb_rcvbulkpipe(card->udev, card->num_bulk_in), - card->urbs[j].buffer, card->urbs[j].size, - cpcusb_read_bulk_callback, card); - - retval = usb_submit_urb(card->urbs[j].urb, GFP_KERNEL); - - if (retval) { - err("%s - failed submitting read urb, error %d", - __func__, retval); - retval = CPC_ERR_TRANSMISSION_FAILED; - goto exit_on_error; - } - } - - info("%s - %d URB's submitted", __func__, j); - - ResetBuffer(card->chan); - - cpcusb_setup_intrep(card); - card->open = 1; - - atomic_inc(&useCount); - -exit_on_error: - /* unlock this device */ - up(&card->sem); - -exit_no_device: - up(&disconnect_sem); - - return retval; -} - -static unsigned int cpcusb_poll(struct file *file, poll_table * wait) -{ - CPC_USB_T *card = (CPC_USB_T *) file->private_data; - unsigned int retval = 0; - - if (!card) { - err("%s - device object lost", __func__); - return -EIO; - } - - poll_wait(file, card->chan->CPCWait_q, wait); - - if (IsBufferNotEmpty(card->chan) || !(card->present)) - retval |= (POLLIN | POLLRDNORM); - - if (card->free_slots) - retval |= (POLLOUT | POLLWRNORM); - - return retval; -} - -static int cpcusb_release(struct inode *inode, struct file *file) -{ - CPC_USB_T *card = (CPC_USB_T *) file->private_data; - int j, retval = 0; - - if (card == NULL) { - dbg("%s - object is NULL", __func__); - return CPC_ERR_NO_INTERFACE_PRESENT; - } - - /* lock our device */ - down(&card->sem); - - if (!card->open) { - dbg("%s - device not opened", __func__); - retval = CPC_ERR_NO_INTERFACE_PRESENT; - goto exit_not_opened; - } - - /* if device wasn't unplugged kill all urbs */ - if (card->present) { - /* kill read urbs */ - for (j = 0; j < CPC_USB_URB_CNT; j++) { - usb_kill_urb(card->urbs[j].urb); - } - - /* kill irq urb */ - usb_kill_urb(card->intr_in_urb); - - /* kill write urbs */ - for (j = 0; j < CPC_USB_URB_CNT; j++) { - if (atomic_read(&card->wrUrbs[j].busy)) { - usb_kill_urb(card->wrUrbs[j].urb); - wait_for_completion(&card->wrUrbs[j].finished); - } - } - } - - atomic_dec(&useCount); - - /* last process detached */ - if (atomic_read(&useCount) == 0) { - wake_up(&rmmodWq); - } - - if (!card->present && card->open) { - /* the device was unplugged before the file was released */ - up(&card->sem); - cpcusb_delete(card); - return 0; - } - - card->open = 0; - -exit_not_opened: - up(&card->sem); - - return 0; -} - -static ssize_t cpcusb_read(struct file *file, char *buffer, size_t count, - loff_t *ppos) -{ - CPC_USB_T *card = (CPC_USB_T *) file->private_data; - CPC_CHAN_T *chan; - int retval = 0; - - if (count < sizeof(CPC_MSG_T)) - return CPC_ERR_UNKNOWN; - - /* check if can read from the given address */ - if (!access_ok(VERIFY_WRITE, buffer, count)) - return CPC_ERR_UNKNOWN; - - /* lock this object */ - down(&card->sem); - - /* verify that the device wasn't unplugged */ - if (!card->present) { - up(&card->sem); - return CPC_ERR_NO_INTERFACE_PRESENT; - } - - if (IsBufferEmpty(card->chan)) { - retval = 0; - } else { - chan = card->chan; - -#if 0 - /* convert LPC2119 params back to SJA1000 params */ - if (card->deviceRevision >= 0x0200 - && chan->buf[chan->oidx].type == CPC_MSG_T_CAN_PRMS) { - LPC2119_TO_SJA1000_Params(&chan->buf[chan->oidx]); - } -#endif - - if (copy_to_user(buffer, &chan->buf[chan->oidx], count) != 0) { - retval = CPC_ERR_IO_TRANSFER; - } else { - chan->oidx = (chan->oidx + 1) % CPC_MSG_BUF_CNT; - chan->WnR = 1; - retval = sizeof(CPC_MSG_T); - } - } -/* spin_unlock_irqrestore(&card->slock, flags); */ - - /* unlock the device */ - up(&card->sem); - - return retval; -} - -#define SHIFT 1 -static inline void cpcusb_align_buffer_alignment(unsigned char *buf) -{ - /* CPC-USB uploads packed bytes. */ - CPC_MSG_T *cpc = (CPC_MSG_T *) buf; - unsigned int i; - - for (i = 0; i < cpc->length + (2 * sizeof(unsigned long)); i++) { - ((unsigned char *) &cpc->msgid)[1 + i] = - ((unsigned char *) &cpc->msgid)[1 + SHIFT + i]; - } -} - -static int cpc_get_buffer_count(CPC_CHAN_T *chan) -{ - /* check the buffer parameters */ - if (chan->iidx == chan->oidx) - return !chan->WnR ? CPC_MSG_BUF_CNT : 0; - else if (chan->iidx >= chan->oidx) - return (chan->iidx - chan->oidx) % CPC_MSG_BUF_CNT; - - return (chan->iidx + CPC_MSG_BUF_CNT - chan->oidx) % CPC_MSG_BUF_CNT; -} - -static ssize_t cpcusb_write(struct file *file, const char *buffer, - size_t count, loff_t *ppos) -{ - CPC_USB_T *card = (CPC_USB_T *) file->private_data; - CPC_USB_WRITE_URB_T *wrUrb = NULL; - - ssize_t bytes_written = 0; - int retval = 0; - int j; - - unsigned char *obuf = NULL; - unsigned char type = 0; - CPC_MSG_T *info = NULL; - - dbg("%s - entered minor %d, count = %zu, present = %d", - __func__, card->minor, count, card->present); - - if (count > sizeof(CPC_MSG_T)) - return CPC_ERR_UNKNOWN; - - /* check if can read from the given address */ - if (!access_ok(VERIFY_READ, buffer, count)) - return CPC_ERR_UNKNOWN; - - /* lock this object */ - down(&card->sem); - - /* verify that the device wasn't unplugged */ - if (!card->present) { - retval = CPC_ERR_NO_INTERFACE_PRESENT; - goto exit; - } - - /* verify that we actually have some data to write */ - if (count == 0) { - dbg("%s - write request of 0 bytes", __func__); - goto exit; - } - - if (card->free_slots <= 5) { - info = (CPC_MSG_T *) buffer; - - if (info->type != CPC_CMD_T_CLEAR_CMD_QUEUE - || card->free_slots <= 0) { - dbg("%s - send buffer full please try again %d", - __func__, card->free_slots); - retval = CPC_ERR_CAN_NO_TRANSMIT_BUF; - goto exit; - } - } - - /* Find a free write urb */ - for (j = 0; j < CPC_USB_URB_CNT; j++) { - if (!atomic_read(&card->wrUrbs[j].busy)) { - wrUrb = &card->wrUrbs[j]; /* remember found URB */ - atomic_set(&wrUrb->busy, 1); /* lock this URB */ - init_completion(&wrUrb->finished); /* init completion */ - dbg("WR URB no. %d started", j); - break; - } - } - - /* don't found write urb say error */ - if (!wrUrb) { - dbg("%s - no free send urb available", __func__); - retval = CPC_ERR_CAN_NO_TRANSMIT_BUF; - goto exit; - } - dbg("URB write req"); - - obuf = (unsigned char *) wrUrb->urb->transfer_buffer; - - /* copy the data from userspace into our transfer buffer; - * this is the only copy required. - */ - if (copy_from_user(&obuf[4], buffer, count) != 0) { - atomic_set(&wrUrb->busy, 0); /* release urb */ - retval = CPC_ERR_IO_TRANSFER; - goto exit; - } - - /* check if it is a DRIVER information message, so we can - * response to that message and not the USB - */ - info = (CPC_MSG_T *) &obuf[4]; - - bytes_written = 11 + info->length; - if (bytes_written >= wrUrb->size) { - retval = CPC_ERR_IO_TRANSFER; - goto exit; - } - - switch (info->type) { - case CPC_CMD_T_CLEAR_MSG_QUEUE: - ResetBuffer(card->chan); - break; - - case CPC_CMD_T_INQ_MSG_QUEUE_CNT: - retval = cpc_get_buffer_count(card->chan); - atomic_set(&wrUrb->busy, 0); - - goto exit; - - case CPC_CMD_T_INQ_INFO: - if (info->msg.info.source == CPC_INFOMSG_T_DRIVER) { - /* release urb cause we'll use it for driver - * information - */ - atomic_set(&wrUrb->busy, 0); - if (IsBufferFull(card->chan)) { - retval = CPC_ERR_IO_TRANSFER; - goto exit; - } - - /* it is a driver information request message and we have - * free rx slots to store the response - */ - type = info->msg.info.type; - info = &card->chan->buf[card->chan->iidx]; - - info->type = CPC_MSG_T_INFO; - info->msg.info.source = CPC_INFOMSG_T_DRIVER; - info->msg.info.type = type; - - switch (type) { - case CPC_INFOMSG_T_VERSION: - info->length = strlen(CPC_DRIVER_VERSION) + 2; - sprintf(info->msg.info.msg, "%s\n", - CPC_DRIVER_VERSION); - break; - - case CPC_INFOMSG_T_SERIAL: - info->length = strlen(CPC_DRIVER_SERIAL) + 2; - sprintf(info->msg.info.msg, "%s\n", - CPC_DRIVER_SERIAL); - break; - - default: - info->length = 2; - info->msg.info.type = - CPC_INFOMSG_T_UNKNOWN_TYPE; - } - - card->chan->WnR = 0; - card->chan->iidx = - (card->chan->iidx + 1) % CPC_MSG_BUF_CNT; - - retval = info->length; - goto exit; - } - break; - case CPC_CMD_T_CAN_PRMS: - /* Check the controller type. If it's the new CPC-USB, make sure if these are SJA1000 params */ - if (info->msg.canparams.cc_type != SJA1000 - && info->msg.canparams.cc_type != M16C_BASIC - && (card->productId == USB_CPCUSB_LPC2119_PRODUCT_ID - && info->msg.canparams.cc_type != SJA1000)) { - /* don't forget to release the urb */ - atomic_set(&wrUrb->busy, 0); - retval = CPC_ERR_WRONG_CONTROLLER_TYPE; - goto exit; - } - break; - } - - /* just convert the params if it is an old CPC-USB with M16C controller */ - if (card->productId == USB_CPCUSB_M16C_PRODUCT_ID) { - /* if it is a parameter message convert it from SJA1000 controller - * settings to M16C Basic controller settings - */ - SJA1000_TO_M16C_BASIC_Params((CPC_MSG_T *) &obuf[4]); - } - - /* don't forget the byte alignment */ - cpcusb_align_buffer_alignment(&obuf[4]); - - /* setup a the 4 byte header */ - obuf[0] = obuf[1] = obuf[2] = obuf[3] = 0; - - /* this urb was already set up, except for this write size */ - wrUrb->urb->transfer_buffer_length = bytes_written + 4; - - /* send the data out the bulk port */ - /* a character device write uses GFP_KERNEL, - unless a spinlock is held */ - retval = usb_submit_urb(wrUrb->urb, GFP_KERNEL); - if (retval) { - atomic_set(&wrUrb->busy, 0); /* release urb */ - err("%s - failed submitting write urb, error %d", - __func__, retval); - } else { - retval = bytes_written; - } - -exit: - /* unlock the device */ - up(&card->sem); - - dbg("%s - leaved", __func__); - - return retval; -} - -/* - * callback for interrupt IN urb - */ -static void cpcusb_read_interrupt_callback(struct urb *urb) -{ - CPC_USB_T *card = (CPC_USB_T *) urb->context; - int retval; - unsigned long flags; - - spin_lock_irqsave(&card->slock, flags); - - if (!card->present) { - spin_unlock_irqrestore(&card->slock, flags); - info("%s - no such device", __func__); - return; - } - - switch (urb->status) { - case 0: /* success */ - card->free_slots = card->intr_in_buffer[1]; - break; - case -ECONNRESET: - case -ENOENT: - case -ESHUTDOWN: - /* urb was killed */ - spin_unlock_irqrestore(&card->slock, flags); - dbg("%s - intr urb killed", __func__); - return; - default: - info("%s - nonzero urb status %d", __func__, urb->status); - break; - } - - retval = usb_submit_urb(urb, GFP_ATOMIC); - if (retval) { - err("%s - failed resubmitting intr urb, error %d", - __func__, retval); - } - - spin_unlock_irqrestore(&card->slock, flags); - wake_up_interruptible(card->chan->CPCWait_q); - - return; -} - -#define UN_SHIFT 1 -#define CPCMSG_HEADER_LEN_FIRMWARE 11 -static inline int cpcusb_unalign_and_copy_buffy(unsigned char *out, - unsigned char *in) -{ - unsigned int i, j; - - for (i = 0; i < 3; i++) - out[i] = in[i]; - - for (j = 0; j < (in[1] + (CPCMSG_HEADER_LEN_FIRMWARE - 3)); j++) - out[j + i + UN_SHIFT] = in[j + i]; - - return i + j; -} - -/* - * callback for bulk IN urb - */ -static void cpcusb_read_bulk_callback(struct urb *urb) -{ - CPC_USB_T *card = (CPC_USB_T *) urb->context; - CPC_CHAN_T *chan; - unsigned char *ibuf = urb->transfer_buffer; - int retval, msgCnt, start, again = 0; - unsigned long flags; - - if (!card) { - err("%s - device object lost", __func__); - return; - } - - spin_lock_irqsave(&card->slock, flags); - - if (!card->present) { - spin_unlock_irqrestore(&card->slock, flags); - info("%s - no such device", __func__); - return; - } - - switch (urb->status) { - case 0: /* success */ - break; - case -ECONNRESET: - case -ENOENT: - case -ESHUTDOWN: - /* urb was killed */ - spin_unlock_irqrestore(&card->slock, flags); - dbg("%s - read urb killed", __func__); - return; - default: - info("%s - nonzero urb status %d", __func__, urb->status); - break; - } - - if (urb->actual_length) { - msgCnt = ibuf[0] & ~0x80; - again = ibuf[0] & 0x80; - - /* we have a 4 byte header */ - start = 4; - chan = card->chan; - while (msgCnt) { - if (!(IsBufferFull(card->chan))) { - start += - cpcusb_unalign_and_copy_buffy((unsigned char *) - &chan->buf[chan->iidx], &ibuf[start]); - - if (start > urb->transfer_buffer_length) { - err("%d > %d", start, urb->transfer_buffer_length); - break; - } - - chan->WnR = 0; - chan->iidx = (chan->iidx + 1) % CPC_MSG_BUF_CNT; - msgCnt--; - } else { - break; - } - } - } - - usb_fill_bulk_urb(urb, card->udev, - usb_rcvbulkpipe(card->udev, card->num_bulk_in), - urb->transfer_buffer, - urb->transfer_buffer_length, - cpcusb_read_bulk_callback, card); - - retval = usb_submit_urb(urb, GFP_ATOMIC); - - if (retval) { - err("%s - failed resubmitting read urb, error %d", __func__, retval); - } - - spin_unlock_irqrestore(&card->slock, flags); - - wake_up_interruptible(card->chan->CPCWait_q); -} - -/* - * callback for bulk IN urb - */ -static void cpcusb_write_bulk_callback(struct urb *urb) -{ - CPC_USB_T *card = (CPC_USB_T *) urb->context; - unsigned long flags; - int j; - - spin_lock_irqsave(&card->slock, flags); - - /* find this urb */ - for (j = 0; j < CPC_USB_URB_CNT; j++) { - if (card->wrUrbs[j].urb == urb) { - dbg("URB found no. %d", j); - /* notify anyone waiting that the write has finished */ - complete(&card->wrUrbs[j].finished); - atomic_set(&card->wrUrbs[j].busy, 0); - break; - } - } - - switch (urb->status) { - case 0: /* success */ - break; - case -ECONNRESET: - case -ENOENT: - case -ESHUTDOWN: - /* urb was killed */ - spin_unlock_irqrestore(&card->slock, flags); - dbg("%s - write urb no. %d killed", __func__, j); - return; - default: - info("%s - nonzero urb status %d", __func__, urb->status); - break; - } - - spin_unlock_irqrestore(&card->slock, flags); - - wake_up_interruptible(card->chan->CPCWait_q); -} - -static inline int cpcusb_get_free_slot(void) -{ - int i; - - for (i = 0; i < CPC_USB_CARD_CNT; i++) { - if (!CPCUSB_Table[i]) - return i; - } - - return -1; -} - -/* - * probe function for new CPC-USB devices - */ -static int cpcusb_probe(struct usb_interface *interface, - const struct usb_device_id *id) -{ - CPC_USB_T *card = NULL; - CPC_CHAN_T *chan = NULL; - - struct usb_device *udev = interface_to_usbdev(interface); - struct usb_host_interface *iface_desc; - struct usb_endpoint_descriptor *endpoint; - - int i, j, retval = -ENOMEM, slot; - - slot = cpcusb_get_free_slot(); - if (slot < 0) { - info("No more devices supported"); - return -ENOMEM; - } - - /* allocate memory for our device state and initialize it */ - card = kzalloc(sizeof(CPC_USB_T), GFP_KERNEL); - if (!card) { - err("Out of memory"); - return -ENOMEM; - } - CPCUSB_Table[slot] = card; - - /* allocate and initialize the channel struct */ - card->chan = kmalloc(sizeof(CPC_CHAN_T), GFP_KERNEL); - if (!card->chan) { - kfree(card); - err("Out of memory"); - return -ENOMEM; - } - - chan = card->chan; - memset(chan, 0, sizeof(CPC_CHAN_T)); - ResetBuffer(chan); - - init_MUTEX(&card->sem); - spin_lock_init(&card->slock); - - card->udev = udev; - card->interface = interface; - if (udev->descriptor.iSerialNumber) { - usb_string(udev, udev->descriptor.iSerialNumber, card->serialNumber, - 128); - info("Serial %s", card->serialNumber); - } - - card->productId = udev->descriptor.idProduct; - info("Product %s", - card->productId == USB_CPCUSB_LPC2119_PRODUCT_ID ? - "CPC-USB/ARM7" : "CPC-USB/M16C"); - - /* set up the endpoint information */ - /* check out the endpoints */ - /* use only the first bulk-in and bulk-out endpoints */ - iface_desc = &interface->altsetting[0]; - for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { - endpoint = &iface_desc->endpoint[i].desc; - - if (!card->num_intr_in && - (endpoint->bEndpointAddress & USB_DIR_IN) && - ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) - == USB_ENDPOINT_XFER_INT)) { - card->intr_in_urb = usb_alloc_urb(0, GFP_KERNEL); - card->num_intr_in = 1; - - if (!card->intr_in_urb) { - err("No free urbs available"); - goto error; - } - - dbg("intr_in urb %d", card->num_intr_in); - } - - if (!card->num_bulk_in && - (endpoint->bEndpointAddress & USB_DIR_IN) && - ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) - == USB_ENDPOINT_XFER_BULK)) { - card->num_bulk_in = 2; - for (j = 0; j < CPC_USB_URB_CNT; j++) { - card->urbs[j].size = endpoint->wMaxPacketSize; - card->urbs[j].urb = usb_alloc_urb(0, GFP_KERNEL); - if (!card->urbs[j].urb) { - err("No free urbs available"); - goto error; - } - card->urbs[j].buffer = - usb_buffer_alloc(udev, - card->urbs[j].size, - GFP_KERNEL, - &card->urbs[j].urb->transfer_dma); - if (!card->urbs[j].buffer) { - err("Couldn't allocate bulk_in_buffer"); - goto error; - } - } - info("%s - %d reading URB's allocated", - __func__, CPC_USB_URB_CNT); - } - - if (!card->num_bulk_out && - !(endpoint->bEndpointAddress & USB_DIR_IN) && - ((endpoint->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) - == USB_ENDPOINT_XFER_BULK)) { - - card->num_bulk_out = 2; - - for (j = 0; j < CPC_USB_URB_CNT; j++) { - card->wrUrbs[j].size = - endpoint->wMaxPacketSize; - card->wrUrbs[j].urb = - usb_alloc_urb(0, GFP_KERNEL); - if (!card->wrUrbs[j].urb) { - err("No free urbs available"); - goto error; - } - card->wrUrbs[j].buffer = usb_buffer_alloc(udev, - card->wrUrbs[j].size, GFP_KERNEL, - &card->wrUrbs[j].urb->transfer_dma); - - if (!card->wrUrbs[j].buffer) { - err("Couldn't allocate bulk_out_buffer"); - goto error; - } - - usb_fill_bulk_urb(card->wrUrbs[j].urb, udev, - usb_sndbulkpipe(udev, endpoint->bEndpointAddress), - card->wrUrbs[j].buffer, - card->wrUrbs[j].size, - cpcusb_write_bulk_callback, - card); - } - - info("%s - %d writing URB's allocated", __func__, CPC_USB_URB_CNT); - } - } - - if (!(card->num_bulk_in && card->num_bulk_out)) { - err("Couldn't find both bulk-in and bulk-out endpoints"); - goto error; - } - - /* allow device read, write and ioctl */ - card->present = 1; - - /* we can register the device now, as it is ready */ - usb_set_intfdata(interface, card); - retval = usb_register_dev(interface, &cpcusb_class); - - if (retval) { - /* something prevented us from registering this driver */ - err("Not able to get a minor for this device."); - usb_set_intfdata(interface, NULL); - goto error; - } - - card->chan->minor = card->minor = interface->minor; - - chan->buf = vmalloc(sizeof(CPC_MSG_T) * CPC_MSG_BUF_CNT); - if (chan->buf == NULL) { - err("Out of memory"); - retval = -ENOMEM; - goto error; - } - info("Allocated memory for %d messages (%lu kbytes)", - CPC_MSG_BUF_CNT, (long unsigned int)(sizeof(CPC_MSG_T) * CPC_MSG_BUF_CNT) / 1000); - memset(chan->buf, 0, sizeof(CPC_MSG_T) * CPC_MSG_BUF_CNT); - - ResetBuffer(chan); - - card->chan->CPCWait_q = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); - if (!card->chan->CPCWait_q) { - err("Out of memory"); - retval = -ENOMEM; - goto error; - } - init_waitqueue_head(card->chan->CPCWait_q); - - CPCUSB_Table[slot] = card; - card->idx = slot; - CPCUsbCnt++; - - /* let the user know what node this device is now attached to */ - info("Device now attached to USB-%d", card->minor); - return 0; - -error: - for (j = 0; j < CPC_USB_URB_CNT; j++) { - if (card->urbs[j].buffer) { - usb_buffer_free(card->udev, card->urbs[j].size, - card->urbs[j].buffer, - card->urbs[j].urb->transfer_dma); - card->urbs[j].buffer = NULL; - } - if (card->urbs[j].urb) { - usb_free_urb(card->urbs[j].urb); - card->urbs[j].urb = NULL; - } - } - - cpcusb_delete(card); - return retval; -} - -/* - * called by the usb core when the device is removed from the system - */ -static void cpcusb_disconnect(struct usb_interface *interface) -{ - CPC_USB_T *card = NULL; - int minor, j; - - /* prevent races with open() */ - down(&disconnect_sem); - - card = usb_get_intfdata(interface); - usb_set_intfdata(interface, NULL); - - down(&card->sem); - - /* prevent device read, write and ioctl */ - card->present = 0; - - minor = card->minor; - - /* free all urbs and their buffers */ - for (j = 0; j < CPC_USB_URB_CNT; j++) { - /* terminate an ongoing write */ - if (atomic_read(&card->wrUrbs[j].busy)) { - usb_kill_urb(card->wrUrbs[j].urb); - wait_for_completion(&card->wrUrbs[j].finished); - } - usb_buffer_free(card->udev, card->wrUrbs[j].size, - card->wrUrbs[j].buffer, - card->wrUrbs[j].urb->transfer_dma); - usb_free_urb(card->wrUrbs[j].urb); - } - info("%d write URBs freed", CPC_USB_URB_CNT); - - /* free all urbs and their buffers */ - for (j = 0; j < CPC_USB_URB_CNT; j++) { - usb_buffer_free(card->udev, card->urbs[j].size, - card->urbs[j].buffer, - card->urbs[j].urb->transfer_dma); - usb_free_urb(card->urbs[j].urb); - } - info("%d read URBs freed", CPC_USB_URB_CNT); - usb_free_urb(card->intr_in_urb); - - /* give back our minor */ - usb_deregister_dev(interface, &cpcusb_class); - - up(&card->sem); - - /* if the device is opened, cpcusb_release will clean this up */ - if (!card->open) - cpcusb_delete(card); - else - wake_up_interruptible(card->chan->CPCWait_q); - - up(&disconnect_sem); - - CPCUsbCnt--; - info("USB-%d now disconnected", minor); -} - -static int __init CPCUsb_Init(void) -{ - int result, i; - - info(DRIVER_DESC " v" DRIVER_VERSION); - info("Build on " __DATE__ " at " __TIME__); - - for (i = 0; i < CPC_USB_CARD_CNT; i++) - CPCUSB_Table[i] = 0; - - /* register this driver with the USB subsystem */ - result = usb_register(&cpcusb_driver); - if (result) { - err("usb_register failed. Error number %d", result); - return result; - } - - procDir = proc_mkdir(CPC_USB_PROC_DIR, NULL); - if (!procDir) { - err("Could not create proc entry"); - } else { - procEntry = create_proc_read_entry("info", 0444, procDir, - cpcusb_proc_read_info, - NULL); - if (!procEntry) { - err("Could not create proc entry %s", CPC_USB_PROC_DIR "/info"); - remove_proc_entry(CPC_USB_PROC_DIR, NULL); - procDir = NULL; - } - } - - return 0; -} - -static void __exit CPCUsb_Exit(void) -{ - wait_event(rmmodWq, !atomic_read(&useCount)); - - /* deregister this driver with the USB subsystem */ - usb_deregister(&cpcusb_driver); - - if (procDir) { - if (procEntry) - remove_proc_entry("info", procDir); - remove_proc_entry(CPC_USB_PROC_DIR, NULL); - } -} - -module_init(CPCUsb_Init); -module_exit(CPCUsb_Exit); diff --git a/drivers/staging/cpc-usb/cpc.h b/drivers/staging/cpc-usb/cpc.h deleted file mode 100644 index b2fda5d14c1..00000000000 --- a/drivers/staging/cpc-usb/cpc.h +++ /dev/null @@ -1,417 +0,0 @@ -/* - * CPC CAN Interface Definitions - * - * Copyright (C) 2000-2008 EMS Dr. Thomas Wuensche - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -#ifndef CPC_HEADER -#define CPC_HEADER - -/* - * the maximum length of the union members within a CPC_MSG - * this value can be defined by the customer, but has to be - * >= 64 bytes - * however, if not defined before, we set a length of 64 byte - */ -#if !defined(CPC_MSG_LEN) || (CPC_MSG_LEN < 64) -#undef CPC_MSG_LEN -#define CPC_MSG_LEN 64 -#endif - -/* - * Transmission of events from CPC interfaces to PC can be individually - * controlled per event type. Default state is: don't transmit - * Control values are constructed by bit-or of Subject and Action - * and passed to CPC_Control() - */ - -/* Control-Values for CPC_Control() Command Subject Selection */ -#define CONTR_CAN_Message 0x04 -#define CONTR_Busload 0x08 -#define CONTR_CAN_State 0x0C -#define CONTR_SendAck 0x10 -#define CONTR_Filter 0x14 -#define CONTR_CmdQueue 0x18 /* reserved, do not use */ -#define CONTR_BusError 0x1C - -/* Control Command Actions */ -#define CONTR_CONT_OFF 0 -#define CONTR_CONT_ON 1 -#define CONTR_SING_ON 2 -/* - * CONTR_SING_ON doesn't change CONTR_CONT_ON state, so it should be - * read as: transmit at least once - */ - -/* defines for confirmed request */ -#define DO_NOT_CONFIRM 0 -#define DO_CONFIRM 1 - -/* event flags */ -#define EVENT_READ 0x01 -#define EVENT_WRITE 0x02 - -/* - * Messages from CPC to PC contain a message object type field. - * The following message types are sent by CPC and can be used in - * handlers, others should be ignored. - */ -#define CPC_MSG_T_RESYNC 0 /* Normally to be ignored */ -#define CPC_MSG_T_CAN 1 /* CAN data frame */ -#define CPC_MSG_T_BUSLOAD 2 /* Busload message */ -#define CPC_MSG_T_STRING 3 /* Normally to be ignored */ -#define CPC_MSG_T_CONTI 4 /* Normally to be ignored */ -#define CPC_MSG_T_MEM 7 /* Normally not to be handled */ -#define CPC_MSG_T_RTR 8 /* CAN remote frame */ -#define CPC_MSG_T_TXACK 9 /* Send acknowledge */ -#define CPC_MSG_T_POWERUP 10 /* Power-up message */ -#define CPC_MSG_T_CMD_NO 11 /* Normally to be ignored */ -#define CPC_MSG_T_CAN_PRMS 12 /* Actual CAN parameters */ -#define CPC_MSG_T_ABORTED 13 /* Command aborted message */ -#define CPC_MSG_T_CANSTATE 14 /* CAN state message */ -#define CPC_MSG_T_RESET 15 /* used to reset CAN-Controller */ -#define CPC_MSG_T_XCAN 16 /* XCAN data frame */ -#define CPC_MSG_T_XRTR 17 /* XCAN remote frame */ -#define CPC_MSG_T_INFO 18 /* information strings */ -#define CPC_MSG_T_CONTROL 19 /* used for control of interface/driver behaviour */ -#define CPC_MSG_T_CONFIRM 20 /* response type for confirmed requests */ -#define CPC_MSG_T_OVERRUN 21 /* response type for overrun conditions */ -#define CPC_MSG_T_KEEPALIVE 22 /* response type for keep alive conditions */ -#define CPC_MSG_T_CANERROR 23 /* response type for bus error conditions */ -#define CPC_MSG_T_DISCONNECTED 24 /* response type for a disconnected interface */ -#define CPC_MSG_T_ERR_COUNTER 25 /* RX/TX error counter of CAN controller */ - -#define CPC_MSG_T_FIRMWARE 100 /* response type for USB firmware download */ - -/* - * Messages from the PC to the CPC interface contain a command field - * Most of the command types are wrapped by the library functions and have therefore - * normally not to be used. - * However, programmers who wish to circumvent the library and talk directly - * to the drivers (mainly Linux programmers) can use the following - * command types: - */ -#define CPC_CMD_T_CAN 1 /* CAN data frame */ -#define CPC_CMD_T_CONTROL 3 /* used for control of interface/driver behaviour */ -#define CPC_CMD_T_CAN_PRMS 6 /* set CAN parameters */ -#define CPC_CMD_T_CLEARBUF 8 /* clears input queue; this is depricated, use CPC_CMD_T_CLEAR_MSG_QUEUE instead */ -#define CPC_CMD_T_INQ_CAN_PARMS 11 /* inquire actual CAN parameters */ -#define CPC_CMD_T_FILTER_PRMS 12 /* set filter parameter */ -#define CPC_CMD_T_RTR 13 /* CAN remote frame */ -#define CPC_CMD_T_CANSTATE 14 /* CAN state message */ -#define CPC_CMD_T_XCAN 15 /* XCAN data frame */ -#define CPC_CMD_T_XRTR 16 /* XCAN remote frame */ -#define CPC_CMD_T_RESET 17 /* used to reset CAN-Controller */ -#define CPC_CMD_T_INQ_INFO 18 /* miscellanous information strings */ -#define CPC_CMD_T_OPEN_CHAN 19 /* open a channel */ -#define CPC_CMD_T_CLOSE_CHAN 20 /* close a channel */ -#define CPC_CMD_T_CNTBUF 21 /* this is depricated, use CPC_CMD_T_INQ_MSG_QUEUE_CNT instead */ -#define CPC_CMD_T_CAN_EXIT 200 /* exit the CAN (disable interrupts; reset bootrate; reset output_cntr; mode = 1) */ - -#define CPC_CMD_T_INQ_MSG_QUEUE_CNT CPC_CMD_T_CNTBUF /* inquires the count of elements in the message queue */ -#define CPC_CMD_T_INQ_ERR_COUNTER 25 /* request the CAN controllers error counter */ -#define CPC_CMD_T_CLEAR_MSG_QUEUE CPC_CMD_T_CLEARBUF /* clear CPC_MSG queue */ -#define CPC_CMD_T_CLEAR_CMD_QUEUE 28 /* clear CPC_CMD queue */ -#define CPC_CMD_T_FIRMWARE 100 /* reserved, must not be used */ -#define CPC_CMD_T_USB_RESET 101 /* reserved, must not be used */ -#define CPC_CMD_T_WAIT_NOTIFY 102 /* reserved, must not be used */ -#define CPC_CMD_T_WAIT_SETUP 103 /* reserved, must not be used */ -#define CPC_CMD_T_ABORT 255 /* Normally not to be used */ - -/* definitions for CPC_MSG_T_INFO information sources */ -#define CPC_INFOMSG_T_UNKNOWN_SOURCE 0 -#define CPC_INFOMSG_T_INTERFACE 1 -#define CPC_INFOMSG_T_DRIVER 2 -#define CPC_INFOMSG_T_LIBRARY 3 - -/* information types */ -#define CPC_INFOMSG_T_UNKNOWN_TYPE 0 -#define CPC_INFOMSG_T_VERSION 1 -#define CPC_INFOMSG_T_SERIAL 2 - -/* definitions for controller types */ -#define PCA82C200 1 /* Philips basic CAN controller, replaced by SJA1000 */ -#define SJA1000 2 /* Philips basic CAN controller */ -#define AN82527 3 /* Intel full CAN controller */ -#define M16C_BASIC 4 /* M16C controller running in basic CAN (not full CAN) mode */ - -/* channel open error codes */ -#define CPC_ERR_NO_FREE_CHANNEL -1 /* no more free space within the channel array */ -#define CPC_ERR_CHANNEL_ALREADY_OPEN -2 /* the channel is already open */ -#define CPC_ERR_CHANNEL_NOT_ACTIVE -3 /* access to a channel not active failed */ -#define CPC_ERR_NO_DRIVER_PRESENT -4 /* no driver at the location searched by the library */ -#define CPC_ERR_NO_INIFILE_PRESENT -5 /* the library could not find the inifile */ -#define CPC_ERR_WRONG_PARAMETERS -6 /* wrong parameters in the inifile */ -#define CPC_ERR_NO_INTERFACE_PRESENT -7 /* 1. The specified interface is not connected */ - /* 2. The interface (mostly CPC-USB) was disconnected upon operation */ -#define CPC_ERR_NO_MATCHING_CHANNEL -8 /* the driver couldn't find a matching channel */ -#define CPC_ERR_NO_BUFFER_AVAILABLE -9 /* the driver couldn't allocate buffer for messages */ -#define CPC_ERR_NO_INTERRUPT -10 /* the requested interrupt couldn't be claimed */ -#define CPC_ERR_NO_MATCHING_INTERFACE -11 /* no interface type related to this channel was found */ -#define CPC_ERR_NO_RESOURCES -12 /* the requested resources could not be claimed */ -#define CPC_ERR_SOCKET -13 /* error concerning TCP sockets */ - -/* init error codes */ -#define CPC_ERR_WRONG_CONTROLLER_TYPE -14 /* wrong CAN controller type within initialization */ -#define CPC_ERR_NO_RESET_MODE -15 /* the controller could not be set into reset mode */ -#define CPC_ERR_NO_CAN_ACCESS -16 /* the CAN controller could not be accessed */ - -/* transmit error codes */ -#define CPC_ERR_CAN_WRONG_ID -20 /* the provided CAN id is too big */ -#define CPC_ERR_CAN_WRONG_LENGTH -21 /* the provided CAN length is too long */ -#define CPC_ERR_CAN_NO_TRANSMIT_BUF -22 /* the transmit buffer was occupied */ -#define CPC_ERR_CAN_TRANSMIT_TIMEOUT -23 /* The message could not be sent within a */ - /* specified time */ - -/* other error codes */ -#define CPC_ERR_SERVICE_NOT_SUPPORTED -30 /* the requested service is not supported by the interface */ -#define CPC_ERR_IO_TRANSFER -31 /* a transmission error down to the driver occurred */ -#define CPC_ERR_TRANSMISSION_FAILED -32 /* a transmission error down to the interface occurred */ -#define CPC_ERR_TRANSMISSION_TIMEOUT -33 /* a timeout occurred within transmission to the interface */ -#define CPC_ERR_OP_SYS_NOT_SUPPORTED -35 /* the operating system is not supported */ -#define CPC_ERR_UNKNOWN -40 /* an unknown error ocurred (mostly IOCTL errors) */ - -#define CPC_ERR_LOADING_DLL -50 /* the library 'cpcwin.dll' could not be loaded */ -#define CPC_ERR_ASSIGNING_FUNCTION -51 /* the specified function could not be assigned */ -#define CPC_ERR_DLL_INITIALIZATION -52 /* the DLL was not initialized correctly */ -#define CPC_ERR_MISSING_LICFILE -55 /* the file containing the licenses does not exist */ -#define CPC_ERR_MISSING_LICENSE -56 /* a required license was not found */ - -/* CAN state bit values. Ignore any bits not listed */ -#define CPC_CAN_STATE_BUSOFF 0x80 -#define CPC_CAN_STATE_ERROR 0x40 - -/* Mask to help ignore undefined bits */ -#define CPC_CAN_STATE_MASK 0xc0 - -/* - * CAN-Message representation in a CPC_MS - * Message object type is CPC_MSG_T_CAN or CPC_MSG_T_RTR - * or CPC_MSG_T_XCAN or CPC_MSG_T_XRTR - */ -typedef struct CPC_CAN_MSG { - u32 id; - u8 length; - u8 msg[8]; -} CPC_CAN_MSG_T; - -/* representation of the CAN parameters for the PCA82C200 controller */ -typedef struct CPC_PCA82C200_PARAMS { - u8 acc_code; /* Acceptance-code for receive, Standard: 0 */ - u8 acc_mask; /* Acceptance-mask for receive, Standard: 0xff (everything) */ - u8 btr0; /* Bus-timing register 0 */ - u8 btr1; /* Bus-timing register 1 */ - u8 outp_contr; /* Output-control register */ -} CPC_PCA82C200_PARAMS_T; - -/* representation of the CAN parameters for the SJA1000 controller */ -typedef struct CPC_SJA1000_PARAMS { - u8 mode; /* enables single or dual acceptance filtering */ - u8 acc_code0; /* Acceptance-code for receive, Standard: 0 */ - u8 acc_code1; - u8 acc_code2; - u8 acc_code3; - u8 acc_mask0; /* Acceptance-mask for receive, Standard: 0xff (everything) */ - u8 acc_mask1; - u8 acc_mask2; - u8 acc_mask3; - u8 btr0; /* Bus-timing register 0 */ - u8 btr1; /* Bus-timing register 1 */ - u8 outp_contr; /* Output-control register */ -} CPC_SJA1000_PARAMS_T; - -/* - * representation of the CAN parameters for the M16C controller - * in basic CAN mode (means no full CAN) - */ -typedef struct CPC_M16C_BASIC_PARAMS { - u8 con0; - u8 con1; - u8 ctlr0; - u8 ctlr1; - u8 clk; - u8 acc_std_code0; - u8 acc_std_code1; - u8 acc_ext_code0; - u8 acc_ext_code1; - u8 acc_ext_code2; - u8 acc_ext_code3; - u8 acc_std_mask0; - u8 acc_std_mask1; - u8 acc_ext_mask0; - u8 acc_ext_mask1; - u8 acc_ext_mask2; - u8 acc_ext_mask3; -} CPC_M16C_BASIC_PARAMS_T; - -/* CAN params message representation */ -typedef struct CPC_CAN_PARAMS { - u8 cc_type; /* represents the controller type */ - union { - CPC_M16C_BASIC_PARAMS_T m16c_basic; - CPC_SJA1000_PARAMS_T sja1000; - CPC_PCA82C200_PARAMS_T pca82c200; - } cc_params; -} CPC_CAN_PARAMS_T; - -/* CHAN init params representation */ -typedef struct CPC_CHAN_PARAMS { - int fd; -} CPC_CHAN_PARAMS_T; - -/* CAN init params message representation */ -typedef struct CPC_INIT_PARAMS { - CPC_CHAN_PARAMS_T chanparams; - CPC_CAN_PARAMS_T canparams; -} CPC_INIT_PARAMS_T; - -/* structure for confirmed message handling */ -typedef struct CPC_CONFIRM { - u8 result; /* error code */ -} CPC_CONFIRM_T; - -/* structure for information requests */ -typedef struct CPC_INFO { - u8 source; /* interface, driver or library */ - u8 type; /* version or serial number */ - char msg[CPC_MSG_LEN - 2]; /* string holding the requested information */ -} CPC_INFO_T; - -/* - * OVERRUN - * In general two types of overrun may occur. - * A hardware overrun, where the CAN controller - * lost a message, because the interrupt was - * not handled before the next messgae comes in. - * Or a software overrun, where i.e. a received - * message could not be stored in the CPC_MSG - * buffer. - */ - -/* After a software overrun has occurred - * we wait until we have CPC_OVR_GAP slots - * free in the CPC_MSG buffer. - */ -#define CPC_OVR_GAP 10 - -/* - * Two types of software overrun may occur. - * A received CAN message or a CAN state event - * can cause an overrun. - * Note: A CPC_CMD which would normally store - * its result immediately in the CPC_MSG - * queue may fail, because the message queue is full. - * This will not generate an overrun message, but - * will halt command execution, until this command - * is able to store its message in the message queue. - */ -#define CPC_OVR_EVENT_CAN 0x01 -#define CPC_OVR_EVENT_CANSTATE 0x02 -#define CPC_OVR_EVENT_BUSERROR 0x04 - -/* - * If the CAN controller lost a message - * we indicate it with the highest bit - * set in the count field. - */ -#define CPC_OVR_HW 0x80 - -/* structure for overrun conditions */ -typedef struct { - u8 event; - u8 count; -} CPC_OVERRUN_T; - -/* - * CAN errors - * Each CAN controller type has different - * registers to record errors. - * Therefor a structure containing the specific - * errors is set up for each controller here - */ - -/* - * SJA1000 error structure - * see the SJA1000 datasheet for detailed - * explanation of the registers - */ -typedef struct CPC_SJA1000_CAN_ERROR { - u8 ecc; /* error capture code register */ - u8 rxerr; /* RX error counter register */ - u8 txerr; /* TX error counter register */ -} CPC_SJA1000_CAN_ERROR_T; - -/* - * M16C error structure - * see the M16C datasheet for detailed - * explanation of the registers - */ -typedef struct CPC_M16C_CAN_ERROR { - u8 tbd; /* to be defined */ -} CPC_M16C_CAN_ERROR_T; - -/* structure for CAN error conditions */ -#define CPC_CAN_ECODE_ERRFRAME 0x01 -typedef struct CPC_CAN_ERROR { - u8 ecode; - struct { - u8 cc_type; /* CAN controller type */ - union { - CPC_SJA1000_CAN_ERROR_T sja1000; - CPC_M16C_CAN_ERROR_T m16c; - } regs; - } cc; -} CPC_CAN_ERROR_T; - -/* - * Structure containing RX/TX error counter. - * This structure is used to request the - * values of the CAN controllers TX and RX - * error counter. - */ -typedef struct CPC_CAN_ERR_COUNTER { - u8 rx; - u8 tx; -} CPC_CAN_ERR_COUNTER_T; - -/* If this flag is set, transmissions from PC to CPC are protected against loss */ -#define CPC_SECURE_TO_CPC 0x01 - -/* If this flag is set, transmissions from CPC to PC are protected against loss */ -#define CPC_SECURE_TO_PC 0x02 - -/* If this flag is set, the CAN-transmit buffer is checked to be free before sending a message */ -#define CPC_SECURE_SEND 0x04 - -/* - * If this flag is set, the transmission complete flag is checked - * after sending a message - * THIS IS CURRENTLY ONLY IMPLEMENTED IN THE PASSIVE INTERFACE DRIVERS - */ -#define CPC_SECURE_TRANSMIT 0x08 - -/* main message type used between library and application */ -typedef struct CPC_MSG { - u8 type; /* type of message */ - u8 length; /* length of data within union 'msg' */ - u8 msgid; /* confirmation handle */ - u32 ts_sec; /* timestamp in seconds */ - u32 ts_nsec; /* timestamp in nano seconds */ - union { - u8 generic[CPC_MSG_LEN]; - CPC_CAN_MSG_T canmsg; - CPC_CAN_PARAMS_T canparams; - CPC_CONFIRM_T confirmation; - CPC_INFO_T info; - CPC_OVERRUN_T overrun; - CPC_CAN_ERROR_T error; - CPC_CAN_ERR_COUNTER_T err_counter; - u8 busload; - u8 canstate; - } msg; -} CPC_MSG_T; - -#endif /* CPC_HEADER */ diff --git a/drivers/staging/cpc-usb/cpc_int.h b/drivers/staging/cpc-usb/cpc_int.h deleted file mode 100644 index 38674e9690a..00000000000 --- a/drivers/staging/cpc-usb/cpc_int.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * CPCLIB - * - * Copyright (C) 2000-2008 EMS Dr. Thomas Wuensche - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - */ -#ifndef CPC_INT_H -#define CPC_INT_H - -#include <linux/wait.h> - -#define CPC_MSG_BUF_CNT 1500 - -#define CPC_PROC_DIR "driver/" - -#undef dbg -#undef err -#undef info - -/* Use our own dbg macro */ -#define dbg(format, arg...) do { if (debug) printk( KERN_INFO format "\n" , ## arg); } while (0) -#define err(format, arg...) do { printk( KERN_INFO "ERROR " format "\n" , ## arg); } while (0) -#define info(format, arg...) do { printk( KERN_INFO format "\n" , ## arg); } while (0) - -/* Macros help using of our buffers */ -#define IsBufferFull(x) (!(x)->WnR) && ((x)->iidx == (x)->oidx) -#define IsBufferEmpty(x) ((x)->WnR) && ((x)->iidx == (x)->oidx) -#define IsBufferNotEmpty(x) (!(x)->WnR) || ((x)->iidx != (x)->oidx) -#define ResetBuffer(x) do { (x)->oidx = (x)->iidx=0; (x)->WnR = 1; } while(0); - -#define CPC_BufWriteAllowed ((chan->oidx != chan->iidx) || chan->WnR) - -typedef void (*chan_write_byte_t) (void *chan, unsigned int reg, - unsigned char val); -typedef unsigned char (*chan_read_byte_t) (void *chan, unsigned int reg); - -typedef struct CPC_CHAN { - void __iomem * canBase; /* base address of SJA1000 */ - chan_read_byte_t read_byte; /* CAN controller read access routine */ - chan_write_byte_t write_byte; /* CAN controller write access routine */ - CPC_MSG_T *buf; /* buffer for CPC msg */ - unsigned int iidx; - unsigned int oidx; - unsigned int WnR; - unsigned int minor; - unsigned int locked; - unsigned int irqDisabled; - - unsigned char cpcCtrlCANMessage; - unsigned char cpcCtrlCANState; - unsigned char cpcCtrlBUSState; - - unsigned char controllerType; - - unsigned long ovrTimeSec; - unsigned long ovrTimeNSec; - unsigned long ovrLockedBuffer; - CPC_OVERRUN_T ovr; - - /* for debugging only */ - unsigned int handledIrqs; - unsigned int lostMessages; - - unsigned int sentStdCan; - unsigned int sentExtCan; - unsigned int sentStdRtr; - unsigned int sentExtRtr; - - unsigned int recvStdCan; - unsigned int recvExtCan; - unsigned int recvStdRtr; - unsigned int recvExtRtr; - - wait_queue_head_t *CPCWait_q; - - void *private; -} CPC_CHAN_T; - -#endif diff --git a/drivers/staging/cpc-usb/cpcusb.h b/drivers/staging/cpc-usb/cpcusb.h deleted file mode 100644 index 6bdf30be239..00000000000 --- a/drivers/staging/cpc-usb/cpcusb.h +++ /dev/null @@ -1,86 +0,0 @@ -/* Header for CPC-USB Driver ******************** - * Copyright 1999, 2000, 2001 - * - * Company: EMS Dr. Thomas Wuensche - * Sonnenhang 3 - * 85304 Ilmmuenster - * Phone: +49-8441-490260 - * Fax: +49-8441-81860 - * email: support@ems-wuensche.com - * WWW: www.ems-wuensche.com - */ - -#ifndef CPCUSB_H -#define CPCUSB_H - -#undef err -#undef dbg -#undef info - -/* Use our own dbg macro */ -#define dbg(format, arg...) do { if (debug) printk(KERN_INFO "CPC-USB: " format "\n" , ## arg); } while (0) -#define info(format, arg...) do { printk(KERN_INFO "CPC-USB: " format "\n" , ## arg); } while (0) -#define err(format, arg...) do { printk(KERN_INFO "CPC-USB(ERROR): " format "\n" , ## arg); } while (0) - -#define CPC_USB_CARD_CNT 4 - -typedef struct CPC_USB_READ_URB { - unsigned char *buffer; /* the buffer to send data */ - size_t size; /* the size of the send buffer */ - struct urb *urb; /* the urb used to send data */ -} CPC_USB_READ_URB_T; - -typedef struct CPC_USB_WRITE_URB { - unsigned char *buffer; /* the buffer to send data */ - size_t size; /* the size of the send buffer */ - struct urb *urb; /* the urb used to send data */ - atomic_t busy; /* true if write urb is busy */ - struct completion finished; /* wait for the write to finish */ -} CPC_USB_WRITE_URB_T; - -#define CPC_USB_URB_CNT 10 - -typedef struct CPC_USB { - struct usb_device *udev; /* save off the usb device pointer */ - struct usb_interface *interface; /* the interface for this device */ - unsigned char minor; /* the starting minor number for this device */ - unsigned char num_ports; /* the number of ports this device has */ - int num_intr_in; /* number of interrupt in endpoints we have */ - int num_bulk_in; /* number of bulk in endpoints we have */ - int num_bulk_out; /* number of bulk out endpoints we have */ - - CPC_USB_READ_URB_T urbs[CPC_USB_URB_CNT]; - - unsigned char intr_in_buffer[4]; /* interrupt transfer buffer */ - struct urb *intr_in_urb; /* interrupt transfer urb */ - - CPC_USB_WRITE_URB_T wrUrbs[CPC_USB_URB_CNT]; - - int open; /* if the port is open or not */ - int present; /* if the device is not disconnected */ - struct semaphore sem; /* locks this structure */ - - int free_slots; /* free send slots of CPC-USB */ - int idx; - - spinlock_t slock; - - char serialNumber[128]; /* serial number */ - int productId; /* product id to differ between M16C and LPC2119 */ - CPC_CHAN_T *chan; -} CPC_USB_T; - -#define CPCTable CPCUSB_Table - -#define CPC_DRIVER_VERSION "0.724" -#define CPC_DRIVER_SERIAL "not applicable" - -#define OBUF_SIZE 255 /* 4096 */ - -/* read timeouts -- RD_NAK_TIMEOUT * RD_EXPIRE = Number of seconds */ -#define RD_NAK_TIMEOUT (10*HZ) /* Default number of X seconds to wait */ -#define RD_EXPIRE 12 /* Number of attempts to wait X seconds */ - -#define CPC_USB_BASE_MNR 0 /* CPC-USB start at minor 0 */ - -#endif diff --git a/drivers/staging/cpc-usb/sja2m16c.h b/drivers/staging/cpc-usb/sja2m16c.h deleted file mode 100644 index 654bd3fc91d..00000000000 --- a/drivers/staging/cpc-usb/sja2m16c.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef _SJA2M16C_H -#define _SJA2M16C_H - -#include "cpc.h" - -#define BAUDRATE_TOLERANCE_PERCENT 1 -#define SAMPLEPOINT_TOLERANCE_PERCENT 5 -#define SAMPLEPOINT_UPPER_LIMIT 88 - -/* M16C parameters */ -struct FIELD_C0CONR { - unsigned int brp:4; - unsigned int sam:1; - unsigned int pr:3; - unsigned int dummy:8; -}; -struct FIELD_C1CONR { - unsigned int ph1:3; - unsigned int ph2:3; - unsigned int sjw:2; - unsigned int dummy:8; -}; -typedef union C0CONR { - unsigned char c0con; - struct FIELD_C0CONR bc0con; -} C0CONR_T; -typedef union C1CONR { - unsigned char c1con; - struct FIELD_C1CONR bc1con; -} C1CONR_T; - -#define SJA_TSEG1 ((pParams->btr1 & 0x0f)+1) -#define SJA_TSEG2 (((pParams->btr1 & 0x70)>>4)+1) -#define SJA_BRP ((pParams->btr0 & 0x3f)+1) -#define SJA_SJW ((pParams->btr0 & 0xc0)>>6) -#define SJA_SAM ((pParams->btr1 & 0x80)>>7) -int baudrate_m16c(int clk, int brp, int pr, int ph1, int ph2); -int samplepoint_m16c(int brp, int pr, int ph1, int ph2); -int SJA1000_TO_M16C_BASIC_Params(CPC_MSG_T *pMsg); - -#endif diff --git a/drivers/staging/cpc-usb/sja2m16c_2.c b/drivers/staging/cpc-usb/sja2m16c_2.c deleted file mode 100644 index bf0230fb778..00000000000 --- a/drivers/staging/cpc-usb/sja2m16c_2.c +++ /dev/null @@ -1,452 +0,0 @@ -/**************************************************************************** -* -* Copyright (c) 2003,2004 by EMS Dr. Thomas Wuensche -* -* - All rights reserved - -* -* This code is provided "as is" without warranty of any kind, either -* expressed or implied, including but not limited to the liability -* concerning the freedom from material defects, the fitness for parti- -* cular purposes or the freedom of proprietary rights of third parties. -* -***************************************************************************** -* Module name.: cpcusb -***************************************************************************** -* Include file: cpc.h -***************************************************************************** -* Project.....: Windows Driver Development Kit -* Filename....: sja2m16c.cpp -* Authors.....: (GU) Gerhard Uttenthaler -* (CS) Christian Schoett -***************************************************************************** -* Short descr.: converts baudrate between SJA1000 and M16C -***************************************************************************** -* Description.: handles the baudrate conversion from SJA1000 parameters to -* M16C parameters -***************************************************************************** -* Address : EMS Dr. Thomas Wuensche -* Sonnenhang 3 -* D-85304 Ilmmuenster -* Tel. : +49-8441-490260 -* Fax. : +49-8441-81860 -* email: support@ems-wuensche.com -***************************************************************************** -* History -***************************************************************************** -* Version Date Auth Remark -* -* 01.00 ?? GU - initial release -* 01.10 ?????????? CS - adapted to fit into the USB Windows driver -* 02.00 18.08.2004 GU - improved the baudrate calculating algorithm -* - implemented acceptance filtering -* 02.10 10.09.2004 CS - adapted to fit into the USB Windows driver -***************************************************************************** -* ToDo's -***************************************************************************** -*/ - -/****************************************************************************/ -/* I N C L U D E S -*/ -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/module.h> -#include <linux/poll.h> -#include <linux/smp_lock.h> -#include <linux/completion.h> -#include <asm/uaccess.h> -#include <linux/usb.h> - -#include "cpc.h" -#include "cpc_int.h" -#include "cpcusb.h" - -#include "sja2m16c.h" - -/*********************************************************************/ -int baudrate_m16c(int clk, int brp, int pr, int ph1, int ph2) -{ - return (16000000 / (1 << clk)) / 2 / (brp + 1) / (1 + pr + 1 + - ph1 + 1 + ph2 + - 1); -} - - -/*********************************************************************/ -int samplepoint_m16c(int brp, int pr, int ph1, int ph2) -{ - return (100 * (1 + pr + 1 + ph1 + 1)) / (1 + pr + 1 + ph1 + 1 + - ph2 + 1); -} - - -/**************************************************************************** -* Function.....: SJA1000_TO_M16C_BASIC_Params -* -* Task.........: This routine converts SJA1000 CAN btr parameters into M16C -* parameters based on the sample point and the error. In -* addition it converts the acceptance filter parameters to -* suit the M16C parameters -* -* Parameters...: None -* -* Return values: None -* -* Comments.....: -***************************************************************************** -* History -***************************************************************************** -* 19.01.2005 CS - modifed the conversion of SJA1000 filter params into -* M16C params. Due to compatibility reasons with the -* older 82C200 CAN controller the SJA1000 -****************************************************************************/ -int SJA1000_TO_M16C_BASIC_Params(CPC_MSG_T * in) -{ - int sjaBaudrate; - int sjaSamplepoint; - int *baudrate_error; // BRP[0..15], PR[0..7], PH1[0..7], PH2[0..7] - int *samplepoint_error; // BRP[0..15], PR[0..7], PH1[0..7], PH2[0..7] - int baudrate_error_merk; - int clk, brp, pr, ph1, ph2; - int clk_merk, brp_merk, pr_merk, ph1_merk, ph2_merk; - int index; - unsigned char acc_code0, acc_code1, acc_code2, acc_code3; - unsigned char acc_mask0, acc_mask1, acc_mask2, acc_mask3; - CPC_MSG_T * out; - C0CONR_T c0con; - C1CONR_T c1con; - int tmpAccCode; - int tmpAccMask; - - // we have to convert the parameters into M16C parameters - CPC_SJA1000_PARAMS_T * pParams; - - // check if the type is CAN parameters and if we have to convert the given params - if (in->type != CPC_CMD_T_CAN_PRMS - || in->msg.canparams.cc_type != SJA1000) - return 0; - pParams = - (CPC_SJA1000_PARAMS_T *) & in->msg.canparams.cc_params.sja1000; - acc_code0 = pParams->acc_code0; - acc_code1 = pParams->acc_code1; - acc_code2 = pParams->acc_code2; - acc_code3 = pParams->acc_code3; - acc_mask0 = pParams->acc_mask0; - acc_mask1 = pParams->acc_mask1; - acc_mask2 = pParams->acc_mask2; - acc_mask3 = pParams->acc_mask3; - -#ifdef _DEBUG_OUTPUT_CAN_PARAMS - info("acc_code0: %2.2Xh\n", acc_code0); - info("acc_code1: %2.2Xh\n", acc_code1); - info("acc_code2: %2.2Xh\n", acc_code2); - info("acc_code3: %2.2Xh\n", acc_code3); - info("acc_mask0: %2.2Xh\n", acc_mask0); - info("acc_mask1: %2.2Xh\n", acc_mask1); - info("acc_mask2: %2.2Xh\n", acc_mask2); - info("acc_mask3: %2.2Xh\n", acc_mask3); - -#endif /* */ - if (! - (baudrate_error = - (int *) vmalloc(sizeof(int) * 16 * 8 * 8 * 8 * 5))) { - err("Could not allocate memory\n"); - return -3; - } - if (! - (samplepoint_error = - (int *) vmalloc(sizeof(int) * 16 * 8 * 8 * 8 * 5))) { - err("Could not allocate memory\n"); - vfree(baudrate_error); - return -3; - } - memset(baudrate_error, 0xff, sizeof(baudrate_error)); - memset(samplepoint_error, 0xff, sizeof(baudrate_error)); - sjaBaudrate = - 16000000 / 2 / SJA_BRP / (1 + SJA_TSEG1 + SJA_TSEG2); - sjaSamplepoint = - 100 * (1 + SJA_TSEG1) / (1 + SJA_TSEG1 + SJA_TSEG2); - if (sjaBaudrate == 0) { - vfree(baudrate_error); - vfree(samplepoint_error); - return -2; - } - -#ifdef _DEBUG_OUTPUT_CAN_PARAMS - info("\nStarting SJA CAN params\n"); - info("-------------------------\n"); - info("TS1 : %2.2Xh TS2 : %2.2Xh\n", SJA_TSEG1, SJA_TSEG2); - info("BTR0 : %2.2Xh BTR1: %2.2Xh\n", pParams->btr0, - pParams->btr1); - info("Baudrate: %d.%dkBaud\n", sjaBaudrate / 1000, - sjaBaudrate % 1000); - info("Sample P: 0.%d\n", sjaSamplepoint); - info("\n"); - -#endif /* */ - c0con.bc0con.sam = SJA_SAM; - c1con.bc1con.sjw = SJA_SJW; - - // calculate errors for all baudrates - index = 0; - for (clk = 0; clk < 5; clk++) { - for (brp = 0; brp < 16; brp++) { - for (pr = 0; pr < 8; pr++) { - for (ph1 = 0; ph1 < 8; ph1++) { - for (ph2 = 0; ph2 < 8; ph2++) { - baudrate_error[index] = - 100 * - abs(baudrate_m16c - (clk, brp, pr, ph1, - ph2) - - sjaBaudrate) / - sjaBaudrate; - samplepoint_error[index] = - abs(samplepoint_m16c - (brp, pr, ph1, - ph2) - - sjaSamplepoint); - -#if 0 - info - ("Baudrate : %d kBaud\n", - baudrate_m16c(clk, - brp, pr, - ph1, - ph2)); - info - ("Baudrate Error: %d\n", - baudrate_error - [index]); - info - ("Sample P Error: %d\n", - samplepoint_error - [index]); - info - ("clk : %d\n", - clk); - -#endif /* */ - index++; - } - } - } - } - } - - // mark all baudrate_error entries which are outer limits - index = 0; - for (clk = 0; clk < 5; clk++) { - for (brp = 0; brp < 16; brp++) { - for (pr = 0; pr < 8; pr++) { - for (ph1 = 0; ph1 < 8; ph1++) { - for (ph2 = 0; ph2 < 8; ph2++) { - if ((baudrate_error[index] - > - BAUDRATE_TOLERANCE_PERCENT) - || - (samplepoint_error - [index] > - SAMPLEPOINT_TOLERANCE_PERCENT) - || - (samplepoint_m16c - (brp, pr, ph1, - ph2) > - SAMPLEPOINT_UPPER_LIMIT)) - { - baudrate_error - [index] = -1; - } else - if (((1 + pr + 1 + - ph1 + 1 + ph2 + - 1) < 8) - || - ((1 + pr + 1 + - ph1 + 1 + ph2 + - 1) > 25)) { - baudrate_error - [index] = -1; - } - -#if 0 - else { - info - ("Baudrate : %d kBaud\n", - baudrate_m16c - (clk, brp, pr, - ph1, ph2)); - info - ("Baudrate Error: %d\n", - baudrate_error - [index]); - info - ("Sample P Error: %d\n", - samplepoint_error - [index]); - } - -#endif /* */ - index++; - } - } - } - } - } - - // find list of minimum of baudrate_error within unmarked entries - clk_merk = brp_merk = pr_merk = ph1_merk = ph2_merk = 0; - baudrate_error_merk = 100; - index = 0; - for (clk = 0; clk < 5; clk++) { - for (brp = 0; brp < 16; brp++) { - for (pr = 0; pr < 8; pr++) { - for (ph1 = 0; ph1 < 8; ph1++) { - for (ph2 = 0; ph2 < 8; ph2++) { - if (baudrate_error[index] - != -1) { - if (baudrate_error - [index] < - baudrate_error_merk) - { - baudrate_error_merk - = - baudrate_error - [index]; - brp_merk = - brp; - pr_merk = - pr; - ph1_merk = - ph1; - ph2_merk = - ph2; - clk_merk = - clk; - -#if 0 - info - ("brp: %2.2Xh pr: %2.2Xh ph1: %2.2Xh ph2: %2.2Xh\n", - brp, - pr, - ph1, - ph2); - info - ("Baudrate : %d kBaud\n", - baudrate_m16c - (clk, - brp, - pr, - ph1, - ph2)); - info - ("Baudrate Error: %d\n", - baudrate_error - [index]); - info - ("Sample P Error: %d\n", - samplepoint_error - [index]); - -#endif /* */ - } - } - index++; - } - } - } - } - } - if (baudrate_error_merk == 100) { - info("ERROR: Could not convert CAN init parameter\n"); - vfree(baudrate_error); - vfree(samplepoint_error); - return -1; - } - - // setting m16c CAN parameter - c0con.bc0con.brp = brp_merk; - c0con.bc0con.pr = pr_merk; - c1con.bc1con.ph1 = ph1_merk; - c1con.bc1con.ph2 = ph2_merk; - -#ifdef _DEBUG_OUTPUT_CAN_PARAMS - info("\nResulting M16C CAN params\n"); - info("-------------------------\n"); - info("clk : %2.2Xh\n", clk_merk); - info("ph1 : %2.2Xh ph2: %2.2Xh\n", c1con.bc1con.ph1 + 1, - c1con.bc1con.ph2 + 1); - info("pr : %2.2Xh brp: %2.2Xh\n", c0con.bc0con.pr + 1, - c0con.bc0con.brp + 1); - info("sjw : %2.2Xh sam: %2.2Xh\n", c1con.bc1con.sjw, - c0con.bc0con.sam); - info("co1 : %2.2Xh co0: %2.2Xh\n", c1con.c1con, c0con.c0con); - info("Baudrate: %d.%dBaud\n", - baudrate_m16c(clk_merk, c0con.bc0con.brp, c0con.bc0con.pr, - c1con.bc1con.ph1, c1con.bc1con.ph2) / 1000, - baudrate_m16c(clk_merk, c0con.bc0con.brp, c0con.bc0con.pr, - c1con.bc1con.ph1, c1con.bc1con.ph2) % 1000); - info("Sample P: 0.%d\n", - samplepoint_m16c(c0con.bc0con.brp, c0con.bc0con.pr, - c1con.bc1con.ph1, c1con.bc1con.ph2)); - info("\n"); - -#endif /* */ - out = in; - out->type = 6; - out->length = sizeof(CPC_M16C_BASIC_PARAMS_T) + 1; - out->msg.canparams.cc_type = M16C_BASIC; - out->msg.canparams.cc_params.m16c_basic.con0 = c0con.c0con; - out->msg.canparams.cc_params.m16c_basic.con1 = c1con.c1con; - out->msg.canparams.cc_params.m16c_basic.ctlr0 = 0x4C; - out->msg.canparams.cc_params.m16c_basic.ctlr1 = 0x00; - out->msg.canparams.cc_params.m16c_basic.clk = clk_merk; - out->msg.canparams.cc_params.m16c_basic.acc_std_code0 = - acc_code0; - out->msg.canparams.cc_params.m16c_basic.acc_std_code1 = acc_code1; - -// info("code0: 0x%2.2X, code1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_code0, out->msg.canparams.cc_params.m16c_basic.acc_std_code1); - tmpAccCode = (acc_code1 >> 5) + (acc_code0 << 3); - out->msg.canparams.cc_params.m16c_basic.acc_std_code0 = - (unsigned char) tmpAccCode; - out->msg.canparams.cc_params.m16c_basic.acc_std_code1 = - (unsigned char) (tmpAccCode >> 8); - -// info("code0: 0x%2.2X, code1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_code0, out->msg.canparams.cc_params.m16c_basic.acc_std_code1); - out->msg.canparams.cc_params.m16c_basic.acc_std_mask0 = - ~acc_mask0; - out->msg.canparams.cc_params.m16c_basic.acc_std_mask1 = - ~acc_mask1; - -// info("mask0: 0x%2.2X, mask1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_mask0, out->msg.canparams.cc_params.m16c_basic.acc_std_mask1); - tmpAccMask = ((acc_mask1) >> 5) + ((acc_mask0) << 3); - -// info("tmpAccMask: 0x%4.4X\n", tmpAccMask); - out->msg.canparams.cc_params.m16c_basic.acc_std_mask0 = - (unsigned char) ~tmpAccMask; - out->msg.canparams.cc_params.m16c_basic.acc_std_mask1 = - (unsigned char) ~(tmpAccMask >> 8); - -// info("mask0: 0x%2.2X, mask1: 0x%2.2X\n", out->msg.canparams.cc_params.m16c_basic.acc_std_mask0, out->msg.canparams.cc_params.m16c_basic.acc_std_mask1); - out->msg.canparams.cc_params.m16c_basic.acc_ext_code0 = - (unsigned char) tmpAccCode; - out->msg.canparams.cc_params.m16c_basic.acc_ext_code1 = - (unsigned char) (tmpAccCode >> 8); - out->msg.canparams.cc_params.m16c_basic.acc_ext_code2 = acc_code2; - out->msg.canparams.cc_params.m16c_basic.acc_ext_code3 = acc_code3; - out->msg.canparams.cc_params.m16c_basic.acc_ext_mask0 = - (unsigned char) ~tmpAccMask; - out->msg.canparams.cc_params.m16c_basic.acc_ext_mask1 = - (unsigned char) ~(tmpAccMask >> 8); - out->msg.canparams.cc_params.m16c_basic.acc_ext_mask2 = - ~acc_mask2; - out->msg.canparams.cc_params.m16c_basic.acc_ext_mask3 = - ~acc_mask3; - vfree(baudrate_error); - vfree(samplepoint_error); - return 0; -} - - diff --git a/drivers/staging/go7007/Makefile b/drivers/staging/go7007/Makefile index d14ea84a01f..1301caa7495 100644 --- a/drivers/staging/go7007/Makefile +++ b/drivers/staging/go7007/Makefile @@ -32,8 +32,3 @@ endif EXTRA_CFLAGS += -Idrivers/media/dvb/frontends EXTRA_CFLAGS += -Idrivers/media/dvb/dvb-core - -# Ubuntu 8.04 has CONFIG_SND undefined, so include lum sound/config.h too -ifeq ($(CONFIG_SND),) -EXTRA_CFLAGS += -include sound/config.h -endif diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig index ebd7237230e..240750881d2 100644 --- a/drivers/usb/Kconfig +++ b/drivers/usb/Kconfig @@ -22,7 +22,6 @@ config USB_ARCH_HAS_HCD default y if PCMCIA && !M32R # sl811_cs default y if ARM # SL-811 default y if SUPERH # r8a66597-hcd - default y if MICROBLAZE default PCI # many non-PCI SOC chips embed OHCI diff --git a/drivers/usb/gadget/f_loopback.c b/drivers/usb/gadget/f_loopback.c index eb6ddfc2085..6cb29d3df57 100644 --- a/drivers/usb/gadget/f_loopback.c +++ b/drivers/usb/gadget/f_loopback.c @@ -22,7 +22,6 @@ /* #define VERBOSE_DEBUG */ #include <linux/kernel.h> -#include <linux/utsname.h> #include <linux/device.h> #include "g_zero.h" diff --git a/drivers/usb/gadget/f_obex.c b/drivers/usb/gadget/f_obex.c index 46d6266f30e..b4a3ba654ea 100644 --- a/drivers/usb/gadget/f_obex.c +++ b/drivers/usb/gadget/f_obex.c @@ -24,7 +24,6 @@ /* #define VERBOSE_DEBUG */ #include <linux/kernel.h> -#include <linux/utsname.h> #include <linux/device.h> #include "u_serial.h" diff --git a/drivers/usb/gadget/f_sourcesink.c b/drivers/usb/gadget/f_sourcesink.c index bffe91d525f..09cba273d2d 100644 --- a/drivers/usb/gadget/f_sourcesink.c +++ b/drivers/usb/gadget/f_sourcesink.c @@ -22,7 +22,6 @@ /* #define VERBOSE_DEBUG */ #include <linux/kernel.h> -#include <linux/utsname.h> #include <linux/device.h> #include "g_zero.h" diff --git a/drivers/usb/gadget/u_audio.c b/drivers/usb/gadget/u_audio.c index b5200d55145..8252595d619 100644 --- a/drivers/usb/gadget/u_audio.c +++ b/drivers/usb/gadget/u_audio.c @@ -10,7 +10,6 @@ */ #include <linux/kernel.h> -#include <linux/utsname.h> #include <linux/device.h> #include <linux/delay.h> #include <linux/ctype.h> diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index f8751ff863c..2fc02bd9584 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -23,7 +23,6 @@ /* #define VERBOSE_DEBUG */ #include <linux/kernel.h> -#include <linux/utsname.h> #include <linux/device.h> #include <linux/ctype.h> #include <linux/etherdevice.h> diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 68fa0e43b78..8c075b2416b 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -912,6 +912,7 @@ static void sierra_release(struct usb_serial *serial) } } +#ifdef CONFIG_PM static void stop_read_write_urbs(struct usb_serial *serial) { int i, j; @@ -988,6 +989,10 @@ static int sierra_resume(struct usb_serial *serial) return ec ? -EIO : 0; } +#else +#define sierra_suspend NULL +#define sierra_resume NULL +#endif static struct usb_serial_driver sierra_device = { .driver = { diff --git a/drivers/vlynq/vlynq.c b/drivers/vlynq/vlynq.c index ba3d71f5c7d..9554ad5f9af 100644 --- a/drivers/vlynq/vlynq.c +++ b/drivers/vlynq/vlynq.c @@ -702,7 +702,7 @@ static int vlynq_probe(struct platform_device *pdev) dev->mem_start = mem_res->start; dev->mem_end = mem_res->end; - len = regs_res->end - regs_res->start; + len = resource_size(regs_res); if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) { printk(KERN_ERR "%s: Can't request vlynq registers\n", dev_name(&dev->dev)); diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index 74e0723e90b..795233702a4 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig @@ -8,3 +8,12 @@ config 9P_FS See <http://v9fs.sf.net> for more information. If unsure, say N. + +config 9P_FSCACHE + bool "Enable 9P client caching support (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y + help + Choose Y here to enable persistent, read-only local + caching support for 9p clients using FS-Cache + diff --git a/fs/9p/Makefile b/fs/9p/Makefile index bc7f0d1551e..1a940ec7af6 100644 --- a/fs/9p/Makefile +++ b/fs/9p/Makefile @@ -8,5 +8,6 @@ obj-$(CONFIG_9P_FS) := 9p.o vfs_dir.o \ vfs_dentry.o \ v9fs.o \ - fid.o \ + fid.o +9p-$(CONFIG_9P_FSCACHE) += cache.o diff --git a/fs/9p/cache.c b/fs/9p/cache.c new file mode 100644 index 00000000000..51c94e26a34 --- /dev/null +++ b/fs/9p/cache.c @@ -0,0 +1,474 @@ +/* + * V9FS cache definitions. + * + * Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/jiffies.h> +#include <linux/file.h> +#include <linux/stat.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <net/9p/9p.h> + +#include "v9fs.h" +#include "cache.h" + +#define CACHETAG_LEN 11 + +struct kmem_cache *vcookie_cache; + +struct fscache_netfs v9fs_cache_netfs = { + .name = "9p", + .version = 0, +}; + +static void init_once(void *foo) +{ + struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo; + vcookie->fscache = NULL; + vcookie->qid = NULL; + inode_init_once(&vcookie->inode); +} + +/** + * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain + * vcookie to inode mapping + * + * Returns 0 on success. + */ + +static int v9fs_init_vcookiecache(void) +{ + vcookie_cache = kmem_cache_create("vcookie_cache", + sizeof(struct v9fs_cookie), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + init_once); + if (!vcookie_cache) + return -ENOMEM; + + return 0; +} + +/** + * v9fs_destroy_vcookiecache - destroy the cache of vcookies + * + */ + +static void v9fs_destroy_vcookiecache(void) +{ + kmem_cache_destroy(vcookie_cache); +} + +int __v9fs_cache_register(void) +{ + int ret; + ret = v9fs_init_vcookiecache(); + if (ret < 0) + return ret; + + return fscache_register_netfs(&v9fs_cache_netfs); +} + +void __v9fs_cache_unregister(void) +{ + v9fs_destroy_vcookiecache(); + fscache_unregister_netfs(&v9fs_cache_netfs); +} + +/** + * v9fs_random_cachetag - Generate a random tag to be associated + * with a new cache session. + * + * The value of jiffies is used for a fairly randomly cache tag. + */ + +static +int v9fs_random_cachetag(struct v9fs_session_info *v9ses) +{ + v9ses->cachetag = kmalloc(CACHETAG_LEN, GFP_KERNEL); + if (!v9ses->cachetag) + return -ENOMEM; + + return scnprintf(v9ses->cachetag, CACHETAG_LEN, "%lu", jiffies); +} + +static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + struct v9fs_session_info *v9ses; + uint16_t klen = 0; + + v9ses = (struct v9fs_session_info *)cookie_netfs_data; + P9_DPRINTK(P9_DEBUG_FSC, "session %p buf %p size %u", v9ses, + buffer, bufmax); + + if (v9ses->cachetag) + klen = strlen(v9ses->cachetag); + + if (klen > bufmax) + return 0; + + memcpy(buffer, v9ses->cachetag, klen); + P9_DPRINTK(P9_DEBUG_FSC, "cache session tag %s", v9ses->cachetag); + return klen; +} + +const struct fscache_cookie_def v9fs_cache_session_index_def = { + .name = "9P.session", + .type = FSCACHE_COOKIE_TYPE_INDEX, + .get_key = v9fs_cache_session_get_key, +}; + +void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) +{ + /* If no cache session tag was specified, we generate a random one. */ + if (!v9ses->cachetag) + v9fs_random_cachetag(v9ses); + + v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index, + &v9fs_cache_session_index_def, + v9ses); + P9_DPRINTK(P9_DEBUG_FSC, "session %p get cookie %p", v9ses, + v9ses->fscache); +} + +void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses) +{ + P9_DPRINTK(P9_DEBUG_FSC, "session %p put cookie %p", v9ses, + v9ses->fscache); + fscache_relinquish_cookie(v9ses->fscache, 0); + v9ses->fscache = NULL; +} + + +static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data, + void *buffer, uint16_t bufmax) +{ + const struct v9fs_cookie *vcookie = cookie_netfs_data; + memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path)); + + P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode, + vcookie->qid->path); + return sizeof(vcookie->qid->path); +} + +static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, + uint64_t *size) +{ + const struct v9fs_cookie *vcookie = cookie_netfs_data; + *size = i_size_read(&vcookie->inode); + + P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode, + *size); +} + +static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, + void *buffer, uint16_t buflen) +{ + const struct v9fs_cookie *vcookie = cookie_netfs_data; + memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version)); + + P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode, + vcookie->qid->version); + return sizeof(vcookie->qid->version); +} + +static enum +fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data, + const void *buffer, + uint16_t buflen) +{ + const struct v9fs_cookie *vcookie = cookie_netfs_data; + + if (buflen != sizeof(vcookie->qid->version)) + return FSCACHE_CHECKAUX_OBSOLETE; + + if (memcmp(buffer, &vcookie->qid->version, + sizeof(vcookie->qid->version))) + return FSCACHE_CHECKAUX_OBSOLETE; + + return FSCACHE_CHECKAUX_OKAY; +} + +static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data) +{ + struct v9fs_cookie *vcookie = cookie_netfs_data; + struct pagevec pvec; + pgoff_t first; + int loop, nr_pages; + + pagevec_init(&pvec, 0); + first = 0; + + for (;;) { + nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping, + first, + PAGEVEC_SIZE - pagevec_count(&pvec)); + if (!nr_pages) + break; + + for (loop = 0; loop < nr_pages; loop++) + ClearPageFsCache(pvec.pages[loop]); + + first = pvec.pages[nr_pages - 1]->index + 1; + + pvec.nr = nr_pages; + pagevec_release(&pvec); + cond_resched(); + } +} + +const struct fscache_cookie_def v9fs_cache_inode_index_def = { + .name = "9p.inode", + .type = FSCACHE_COOKIE_TYPE_DATAFILE, + .get_key = v9fs_cache_inode_get_key, + .get_attr = v9fs_cache_inode_get_attr, + .get_aux = v9fs_cache_inode_get_aux, + .check_aux = v9fs_cache_inode_check_aux, + .now_uncached = v9fs_cache_inode_now_uncached, +}; + +void v9fs_cache_inode_get_cookie(struct inode *inode) +{ + struct v9fs_cookie *vcookie; + struct v9fs_session_info *v9ses; + + if (!S_ISREG(inode->i_mode)) + return; + + vcookie = v9fs_inode2cookie(inode); + if (vcookie->fscache) + return; + + v9ses = v9fs_inode2v9ses(inode); + vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, + &v9fs_cache_inode_index_def, + vcookie); + + P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode, + vcookie->fscache); +} + +void v9fs_cache_inode_put_cookie(struct inode *inode) +{ + struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); + + if (!vcookie->fscache) + return; + P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode, + vcookie->fscache); + + fscache_relinquish_cookie(vcookie->fscache, 0); + vcookie->fscache = NULL; +} + +void v9fs_cache_inode_flush_cookie(struct inode *inode) +{ + struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); + + if (!vcookie->fscache) + return; + P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode, + vcookie->fscache); + + fscache_relinquish_cookie(vcookie->fscache, 1); + vcookie->fscache = NULL; +} + +void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp) +{ + struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); + struct p9_fid *fid; + + if (!vcookie->fscache) + return; + + spin_lock(&vcookie->lock); + fid = filp->private_data; + if ((filp->f_flags & O_ACCMODE) != O_RDONLY) + v9fs_cache_inode_flush_cookie(inode); + else + v9fs_cache_inode_get_cookie(inode); + + spin_unlock(&vcookie->lock); +} + +void v9fs_cache_inode_reset_cookie(struct inode *inode) +{ + struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); + struct v9fs_session_info *v9ses; + struct fscache_cookie *old; + + if (!vcookie->fscache) + return; + + old = vcookie->fscache; + + spin_lock(&vcookie->lock); + fscache_relinquish_cookie(vcookie->fscache, 1); + + v9ses = v9fs_inode2v9ses(inode); + vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, + &v9fs_cache_inode_index_def, + vcookie); + + P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p", + inode, old, vcookie->fscache); + + spin_unlock(&vcookie->lock); +} + +int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) +{ + struct inode *inode = page->mapping->host; + struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); + + BUG_ON(!vcookie->fscache); + + if (PageFsCache(page)) { + if (fscache_check_page_write(vcookie->fscache, page)) { + if (!(gfp & __GFP_WAIT)) + return 0; + fscache_wait_on_page_write(vcookie->fscache, page); + } + + fscache_uncache_page(vcookie->fscache, page); + ClearPageFsCache(page); + } + + return 1; +} + +void __v9fs_fscache_invalidate_page(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); + + BUG_ON(!vcookie->fscache); + + if (PageFsCache(page)) { + fscache_wait_on_page_write(vcookie->fscache, page); + BUG_ON(!PageLocked(page)); + fscache_uncache_page(vcookie->fscache, page); + ClearPageFsCache(page); + } +} + +static void v9fs_vfs_readpage_complete(struct page *page, void *data, + int error) +{ + if (!error) + SetPageUptodate(page); + + unlock_page(page); +} + +/** + * __v9fs_readpage_from_fscache - read a page from cache + * + * Returns 0 if the pages are in cache and a BIO is submitted, + * 1 if the pages are not in cache and -error otherwise. + */ + +int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) +{ + int ret; + const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); + + P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); + if (!vcookie->fscache) + return -ENOBUFS; + + ret = fscache_read_or_alloc_page(vcookie->fscache, + page, + v9fs_vfs_readpage_complete, + NULL, + GFP_KERNEL); + switch (ret) { + case -ENOBUFS: + case -ENODATA: + P9_DPRINTK(P9_DEBUG_FSC, "page/inode not in cache %d", ret); + return 1; + case 0: + P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted"); + return ret; + default: + P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret); + return ret; + } +} + +/** + * __v9fs_readpages_from_fscache - read multiple pages from cache + * + * Returns 0 if the pages are in cache and a BIO is submitted, + * 1 if the pages are not in cache and -error otherwise. + */ + +int __v9fs_readpages_from_fscache(struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + int ret; + const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); + + P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages); + if (!vcookie->fscache) + return -ENOBUFS; + + ret = fscache_read_or_alloc_pages(vcookie->fscache, + mapping, pages, nr_pages, + v9fs_vfs_readpage_complete, + NULL, + mapping_gfp_mask(mapping)); + switch (ret) { + case -ENOBUFS: + case -ENODATA: + P9_DPRINTK(P9_DEBUG_FSC, "pages/inodes not in cache %d", ret); + return 1; + case 0: + BUG_ON(!list_empty(pages)); + BUG_ON(*nr_pages != 0); + P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted"); + return ret; + default: + P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret); + return ret; + } +} + +/** + * __v9fs_readpage_to_fscache - write a page to the cache + * + */ + +void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) +{ + int ret; + const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); + + P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); + ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL); + P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret); + if (ret != 0) + v9fs_uncache_page(inode, page); +} diff --git a/fs/9p/cache.h b/fs/9p/cache.h new file mode 100644 index 00000000000..a94192bfaee --- /dev/null +++ b/fs/9p/cache.h @@ -0,0 +1,176 @@ +/* + * V9FS cache definitions. + * + * Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#ifndef _9P_CACHE_H +#ifdef CONFIG_9P_FSCACHE +#include <linux/fscache.h> +#include <linux/spinlock.h> + +extern struct kmem_cache *vcookie_cache; + +struct v9fs_cookie { + spinlock_t lock; + struct inode inode; + struct fscache_cookie *fscache; + struct p9_qid *qid; +}; + +static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode) +{ + return container_of(inode, struct v9fs_cookie, inode); +} + +extern struct fscache_netfs v9fs_cache_netfs; +extern const struct fscache_cookie_def v9fs_cache_session_index_def; +extern const struct fscache_cookie_def v9fs_cache_inode_index_def; + +extern void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses); +extern void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses); + +extern void v9fs_cache_inode_get_cookie(struct inode *inode); +extern void v9fs_cache_inode_put_cookie(struct inode *inode); +extern void v9fs_cache_inode_flush_cookie(struct inode *inode); +extern void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp); +extern void v9fs_cache_inode_reset_cookie(struct inode *inode); + +extern int __v9fs_cache_register(void); +extern void __v9fs_cache_unregister(void); + +extern int __v9fs_fscache_release_page(struct page *page, gfp_t gfp); +extern void __v9fs_fscache_invalidate_page(struct page *page); +extern int __v9fs_readpage_from_fscache(struct inode *inode, + struct page *page); +extern int __v9fs_readpages_from_fscache(struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages); +extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page); + + +/** + * v9fs_cache_register - Register v9fs file system with the cache + */ +static inline int v9fs_cache_register(void) +{ + return __v9fs_cache_register(); +} + +/** + * v9fs_cache_unregister - Unregister v9fs from the cache + */ +static inline void v9fs_cache_unregister(void) +{ + __v9fs_cache_unregister(); +} + +static inline int v9fs_fscache_release_page(struct page *page, + gfp_t gfp) +{ + return __v9fs_fscache_release_page(page, gfp); +} + +static inline void v9fs_fscache_invalidate_page(struct page *page) +{ + __v9fs_fscache_invalidate_page(page); +} + +static inline int v9fs_readpage_from_fscache(struct inode *inode, + struct page *page) +{ + return __v9fs_readpage_from_fscache(inode, page); +} + +static inline int v9fs_readpages_from_fscache(struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + return __v9fs_readpages_from_fscache(inode, mapping, pages, + nr_pages); +} + +static inline void v9fs_readpage_to_fscache(struct inode *inode, + struct page *page) +{ + if (PageFsCache(page)) + __v9fs_readpage_to_fscache(inode, page); +} + +static inline void v9fs_uncache_page(struct inode *inode, struct page *page) +{ + struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); + fscache_uncache_page(vcookie->fscache, page); + BUG_ON(PageFsCache(page)); +} + +static inline void v9fs_vcookie_set_qid(struct inode *inode, + struct p9_qid *qid) +{ + struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); + spin_lock(&vcookie->lock); + vcookie->qid = qid; + spin_unlock(&vcookie->lock); +} + +#else /* CONFIG_9P_FSCACHE */ + +static inline int v9fs_cache_register(void) +{ + return 1; +} + +static inline void v9fs_cache_unregister(void) {} + +static inline int v9fs_fscache_release_page(struct page *page, + gfp_t gfp) { + return 1; +} + +static inline void v9fs_fscache_invalidate_page(struct page *page) {} + +static inline int v9fs_readpage_from_fscache(struct inode *inode, + struct page *page) +{ + return -ENOBUFS; +} + +static inline int v9fs_readpages_from_fscache(struct inode *inode, + struct address_space *mapping, + struct list_head *pages, + unsigned *nr_pages) +{ + return -ENOBUFS; +} + +static inline void v9fs_readpage_to_fscache(struct inode *inode, + struct page *page) +{} + +static inline void v9fs_uncache_page(struct inode *inode, struct page *page) +{} + +static inline void v9fs_vcookie_set_qid(struct inode *inode, + struct p9_qid *qid) +{} + +#endif /* CONFIG_9P_FSCACHE */ +#endif /* _9P_CACHE_H */ diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index f7003cfac63..cf62b05e296 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -34,21 +34,25 @@ #include <net/9p/transport.h> #include "v9fs.h" #include "v9fs_vfs.h" +#include "cache.h" + +static DEFINE_SPINLOCK(v9fs_sessionlist_lock); +static LIST_HEAD(v9fs_sessionlist); /* - * Option Parsing (code inspired by NFS code) - * NOTE: each transport will parse its own options - */ + * Option Parsing (code inspired by NFS code) + * NOTE: each transport will parse its own options + */ enum { /* Options that take integer arguments */ Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid, /* String options */ - Opt_uname, Opt_remotename, Opt_trans, + Opt_uname, Opt_remotename, Opt_trans, Opt_cache, Opt_cachetag, /* Options that take no arguments */ Opt_nodevmap, /* Cache options */ - Opt_cache_loose, + Opt_cache_loose, Opt_fscache, /* Access options */ Opt_access, /* Error token */ @@ -63,8 +67,10 @@ static const match_table_t tokens = { {Opt_uname, "uname=%s"}, {Opt_remotename, "aname=%s"}, {Opt_nodevmap, "nodevmap"}, - {Opt_cache_loose, "cache=loose"}, + {Opt_cache, "cache=%s"}, {Opt_cache_loose, "loose"}, + {Opt_fscache, "fscache"}, + {Opt_cachetag, "cachetag=%s"}, {Opt_access, "access=%s"}, {Opt_err, NULL} }; @@ -89,16 +95,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) v9ses->afid = ~0; v9ses->debug = 0; v9ses->cache = 0; +#ifdef CONFIG_9P_FSCACHE + v9ses->cachetag = NULL; +#endif if (!opts) return 0; options = kstrdup(opts, GFP_KERNEL); - if (!options) { - P9_DPRINTK(P9_DEBUG_ERROR, - "failed to allocate copy of option string\n"); - return -ENOMEM; - } + if (!options) + goto fail_option_alloc; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -143,16 +149,33 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) case Opt_cache_loose: v9ses->cache = CACHE_LOOSE; break; + case Opt_fscache: + v9ses->cache = CACHE_FSCACHE; + break; + case Opt_cachetag: +#ifdef CONFIG_9P_FSCACHE + v9ses->cachetag = match_strdup(&args[0]); +#endif + break; + case Opt_cache: + s = match_strdup(&args[0]); + if (!s) + goto fail_option_alloc; + + if (strcmp(s, "loose") == 0) + v9ses->cache = CACHE_LOOSE; + else if (strcmp(s, "fscache") == 0) + v9ses->cache = CACHE_FSCACHE; + else + v9ses->cache = CACHE_NONE; + kfree(s); + break; case Opt_access: s = match_strdup(&args[0]); - if (!s) { - P9_DPRINTK(P9_DEBUG_ERROR, - "failed to allocate copy" - " of option argument\n"); - ret = -ENOMEM; - break; - } + if (!s) + goto fail_option_alloc; + v9ses->flags &= ~V9FS_ACCESS_MASK; if (strcmp(s, "user") == 0) v9ses->flags |= V9FS_ACCESS_USER; @@ -173,6 +196,11 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) } kfree(options); return ret; + +fail_option_alloc: + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option argument\n"); + return -ENOMEM; } /** @@ -200,6 +228,10 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, return ERR_PTR(-ENOMEM); } + spin_lock(&v9fs_sessionlist_lock); + list_add(&v9ses->slist, &v9fs_sessionlist); + spin_unlock(&v9fs_sessionlist_lock); + v9ses->flags = V9FS_EXTENDED | V9FS_ACCESS_USER; strcpy(v9ses->uname, V9FS_DEFUSER); strcpy(v9ses->aname, V9FS_DEFANAME); @@ -249,6 +281,11 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, else fid->uid = ~0; +#ifdef CONFIG_9P_FSCACHE + /* register the session for caching */ + v9fs_cache_session_get_cookie(v9ses); +#endif + return fid; error: @@ -268,8 +305,18 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) v9ses->clnt = NULL; } +#ifdef CONFIG_9P_FSCACHE + if (v9ses->fscache) { + v9fs_cache_session_put_cookie(v9ses); + kfree(v9ses->cachetag); + } +#endif __putname(v9ses->uname); __putname(v9ses->aname); + + spin_lock(&v9fs_sessionlist_lock); + list_del(&v9ses->slist); + spin_unlock(&v9fs_sessionlist_lock); } /** @@ -286,25 +333,132 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses) { extern int v9fs_error_init(void); +static struct kobject *v9fs_kobj; + +#ifdef CONFIG_9P_FSCACHE /** - * v9fs_init - Initialize module + * caches_show - list caches associated with a session + * + * Returns the size of buffer written. + */ + +static ssize_t caches_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + ssize_t n = 0, count = 0, limit = PAGE_SIZE; + struct v9fs_session_info *v9ses; + + spin_lock(&v9fs_sessionlist_lock); + list_for_each_entry(v9ses, &v9fs_sessionlist, slist) { + if (v9ses->cachetag) { + n = snprintf(buf, limit, "%s\n", v9ses->cachetag); + if (n < 0) { + count = n; + break; + } + + count += n; + limit -= n; + } + } + + spin_unlock(&v9fs_sessionlist_lock); + return count; +} + +static struct kobj_attribute v9fs_attr_cache = __ATTR_RO(caches); +#endif /* CONFIG_9P_FSCACHE */ + +static struct attribute *v9fs_attrs[] = { +#ifdef CONFIG_9P_FSCACHE + &v9fs_attr_cache.attr, +#endif + NULL, +}; + +static struct attribute_group v9fs_attr_group = { + .attrs = v9fs_attrs, +}; + +/** + * v9fs_sysfs_init - Initialize the v9fs sysfs interface + * + */ + +static int v9fs_sysfs_init(void) +{ + v9fs_kobj = kobject_create_and_add("9p", fs_kobj); + if (!v9fs_kobj) + return -ENOMEM; + + if (sysfs_create_group(v9fs_kobj, &v9fs_attr_group)) { + kobject_put(v9fs_kobj); + return -ENOMEM; + } + + return 0; +} + +/** + * v9fs_sysfs_cleanup - Unregister the v9fs sysfs interface + * + */ + +static void v9fs_sysfs_cleanup(void) +{ + sysfs_remove_group(v9fs_kobj, &v9fs_attr_group); + kobject_put(v9fs_kobj); +} + +/** + * init_v9fs - Initialize module * */ static int __init init_v9fs(void) { + int err; printk(KERN_INFO "Installing v9fs 9p2000 file system support\n"); /* TODO: Setup list of registered trasnport modules */ - return register_filesystem(&v9fs_fs_type); + err = register_filesystem(&v9fs_fs_type); + if (err < 0) { + printk(KERN_ERR "Failed to register filesystem\n"); + return err; + } + + err = v9fs_cache_register(); + if (err < 0) { + printk(KERN_ERR "Failed to register v9fs for caching\n"); + goto out_fs_unreg; + } + + err = v9fs_sysfs_init(); + if (err < 0) { + printk(KERN_ERR "Failed to register with sysfs\n"); + goto out_sysfs_cleanup; + } + + return 0; + +out_sysfs_cleanup: + v9fs_sysfs_cleanup(); + +out_fs_unreg: + unregister_filesystem(&v9fs_fs_type); + + return err; } /** - * v9fs_init - shutdown module + * exit_v9fs - shutdown module * */ static void __exit exit_v9fs(void) { + v9fs_sysfs_cleanup(); + v9fs_cache_unregister(); unregister_filesystem(&v9fs_fs_type); } diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 38762bf102a..019f4ccb70c 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -51,6 +51,7 @@ enum p9_session_flags { enum p9_cache_modes { CACHE_NONE, CACHE_LOOSE, + CACHE_FSCACHE, }; /** @@ -60,6 +61,8 @@ enum p9_cache_modes { * @debug: debug level * @afid: authentication handle * @cache: cache mode of type &p9_cache_modes + * @cachetag: the tag of the cache associated with this session + * @fscache: session cookie associated with FS-Cache * @options: copy of options string given by user * @uname: string user name to mount hierarchy as * @aname: mount specifier for remote hierarchy @@ -68,7 +71,7 @@ enum p9_cache_modes { * @dfltgid: default numeric groupid to mount hierarchy as * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy * @clnt: reference to 9P network client instantiated for this session - * @debugfs_dir: reference to debugfs_dir which can be used for add'l debug + * @slist: reference to list of registered 9p sessions * * This structure holds state for each session instance established during * a sys_mount() . @@ -84,6 +87,10 @@ struct v9fs_session_info { unsigned short debug; unsigned int afid; unsigned int cache; +#ifdef CONFIG_9P_FSCACHE + char *cachetag; + struct fscache_cookie *fscache; +#endif char *uname; /* user name to mount as */ char *aname; /* name of remote hierarchy being mounted */ @@ -92,11 +99,9 @@ struct v9fs_session_info { unsigned int dfltgid; /* default gid for legacy support */ u32 uid; /* if ACCESS_SINGLE, the uid that has access */ struct p9_client *clnt; /* 9p client */ - struct dentry *debugfs_dir; + struct list_head slist; /* list of sessions registered with v9fs */ }; -extern struct dentry *v9fs_debugfs_root; - struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, char *); void v9fs_session_close(struct v9fs_session_info *v9ses); diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index f0c7de78e20..3a7560e3586 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -44,7 +44,13 @@ extern const struct file_operations v9fs_dir_operations; extern const struct dentry_operations v9fs_dentry_operations; extern const struct dentry_operations v9fs_cached_dentry_operations; +#ifdef CONFIG_9P_FSCACHE +struct inode *v9fs_alloc_inode(struct super_block *sb); +void v9fs_destroy_inode(struct inode *inode); +#endif + struct inode *v9fs_get_inode(struct super_block *sb, int mode); +void v9fs_clear_inode(struct inode *inode); ino_t v9fs_qid2ino(struct p9_qid *qid); void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); int v9fs_dir_release(struct inode *inode, struct file *filp); diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 92828281a30..90e38449f4b 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -38,6 +38,7 @@ #include "v9fs.h" #include "v9fs_vfs.h" +#include "cache.h" /** * v9fs_vfs_readpage - read an entire page in from 9P @@ -52,18 +53,31 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page) int retval; loff_t offset; char *buffer; + struct inode *inode; + inode = page->mapping->host; P9_DPRINTK(P9_DEBUG_VFS, "\n"); + + BUG_ON(!PageLocked(page)); + + retval = v9fs_readpage_from_fscache(inode, page); + if (retval == 0) + return retval; + buffer = kmap(page); offset = page_offset(page); retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset); - if (retval < 0) + if (retval < 0) { + v9fs_uncache_page(inode, page); goto done; + } memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval); flush_dcache_page(page); SetPageUptodate(page); + + v9fs_readpage_to_fscache(inode, page); retval = 0; done: @@ -72,6 +86,78 @@ done: return retval; } +/** + * v9fs_vfs_readpages - read a set of pages from 9P + * + * @filp: file being read + * @mapping: the address space + * @pages: list of pages to read + * @nr_pages: count of pages to read + * + */ + +static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + int ret = 0; + struct inode *inode; + + inode = mapping->host; + P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp); + + ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages); + if (ret == 0) + return ret; + + ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp); + P9_DPRINTK(P9_DEBUG_VFS, " = %d\n", ret); + return ret; +} + +/** + * v9fs_release_page - release the private state associated with a page + * + * Returns 1 if the page can be released, false otherwise. + */ + +static int v9fs_release_page(struct page *page, gfp_t gfp) +{ + if (PagePrivate(page)) + return 0; + + return v9fs_fscache_release_page(page, gfp); +} + +/** + * v9fs_invalidate_page - Invalidate a page completely or partially + * + * @page: structure to page + * @offset: offset in the page + */ + +static void v9fs_invalidate_page(struct page *page, unsigned long offset) +{ + if (offset == 0) + v9fs_fscache_invalidate_page(page); +} + +/** + * v9fs_launder_page - Writeback a dirty page + * Since the writes go directly to the server, we simply return a 0 + * here to indicate success. + * + * Returns 0 on success. + */ + +static int v9fs_launder_page(struct page *page) +{ + return 0; +} + const struct address_space_operations v9fs_addr_operations = { .readpage = v9fs_vfs_readpage, + .readpages = v9fs_vfs_readpages, + .releasepage = v9fs_release_page, + .invalidatepage = v9fs_invalidate_page, + .launder_page = v9fs_launder_page, }; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 68bf2af6c38..3902bf43a08 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -32,6 +32,7 @@ #include <linux/string.h> #include <linux/inet.h> #include <linux/list.h> +#include <linux/pagemap.h> #include <asm/uaccess.h> #include <linux/idr.h> #include <net/9p/9p.h> @@ -40,6 +41,7 @@ #include "v9fs.h" #include "v9fs_vfs.h" #include "fid.h" +#include "cache.h" static const struct file_operations v9fs_cached_file_operations; @@ -72,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) return err; } if (omode & P9_OTRUNC) { - inode->i_size = 0; + i_size_write(inode, 0); inode->i_blocks = 0; } if ((file->f_flags & O_APPEND) && (!v9fs_extended(v9ses))) @@ -85,6 +87,10 @@ int v9fs_file_open(struct inode *inode, struct file *file) /* enable cached file options */ if(file->f_op == &v9fs_file_operations) file->f_op = &v9fs_cached_file_operations; + +#ifdef CONFIG_9P_FSCACHE + v9fs_cache_inode_set_cookie(inode, file); +#endif } return 0; @@ -210,6 +216,7 @@ v9fs_file_write(struct file *filp, const char __user * data, struct p9_client *clnt; struct inode *inode = filp->f_path.dentry->d_inode; int origin = *offset; + unsigned long pg_start, pg_end; P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count, (int)*offset); @@ -225,7 +232,7 @@ v9fs_file_write(struct file *filp, const char __user * data, if (count < rsize) rsize = count; - n = p9_client_write(fid, NULL, data+total, *offset+total, + n = p9_client_write(fid, NULL, data+total, origin+total, rsize); if (n <= 0) break; @@ -234,14 +241,14 @@ v9fs_file_write(struct file *filp, const char __user * data, } while (count > 0); if (total > 0) { - invalidate_inode_pages2_range(inode->i_mapping, origin, - origin+total); + pg_start = origin >> PAGE_CACHE_SHIFT; + pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT; + if (inode->i_mapping && inode->i_mapping->nrpages) + invalidate_inode_pages2_range(inode->i_mapping, + pg_start, pg_end); *offset += total; - } - - if (*offset > inode->i_size) { - inode->i_size = *offset; - inode->i_blocks = (inode->i_size + 512 - 1) >> 9; + i_size_write(inode, i_size_read(inode) + total); + inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; } if (n < 0) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 06a223d50a8..5947628aefe 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -40,6 +40,7 @@ #include "v9fs.h" #include "v9fs_vfs.h" #include "fid.h" +#include "cache.h" static const struct inode_operations v9fs_dir_inode_operations; static const struct inode_operations v9fs_dir_inode_operations_ext; @@ -197,6 +198,39 @@ v9fs_blank_wstat(struct p9_wstat *wstat) wstat->extension = NULL; } +#ifdef CONFIG_9P_FSCACHE +/** + * v9fs_alloc_inode - helper function to allocate an inode + * This callback is executed before setting up the inode so that we + * can associate a vcookie with each inode. + * + */ + +struct inode *v9fs_alloc_inode(struct super_block *sb) +{ + struct v9fs_cookie *vcookie; + vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache, + GFP_KERNEL); + if (!vcookie) + return NULL; + + vcookie->fscache = NULL; + vcookie->qid = NULL; + spin_lock_init(&vcookie->lock); + return &vcookie->inode; +} + +/** + * v9fs_destroy_inode - destroy an inode + * + */ + +void v9fs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode)); +} +#endif + /** * v9fs_get_inode - helper function to setup an inode * @sb: superblock @@ -326,6 +360,21 @@ error: } */ + +/** + * v9fs_clear_inode - release an inode + * @inode: inode to release + * + */ +void v9fs_clear_inode(struct inode *inode) +{ + filemap_fdatawrite(inode->i_mapping); + +#ifdef CONFIG_9P_FSCACHE + v9fs_cache_inode_put_cookie(inode); +#endif +} + /** * v9fs_inode_from_fid - populate an inode by issuing a attribute request * @v9ses: session information @@ -356,8 +405,14 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, v9fs_stat2inode(st, ret, sb); ret->i_ino = v9fs_qid2ino(&st->qid); + +#ifdef CONFIG_9P_FSCACHE + v9fs_vcookie_set_qid(ret, &st->qid); + v9fs_cache_inode_get_cookie(ret); +#endif p9stat_free(st); kfree(st); + return ret; error: @@ -751,7 +806,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); err = -EPERM; v9ses = v9fs_inode2v9ses(dentry->d_inode); - if (v9ses->cache == CACHE_LOOSE) + if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) return simple_getattr(mnt, dentry, stat); fid = v9fs_fid_lookup(dentry); @@ -872,10 +927,10 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, } else inode->i_rdev = 0; - inode->i_size = stat->length; + i_size_write(inode, stat->length); /* not real number of blocks, but 512 byte ones ... */ - inode->i_blocks = (inode->i_size + 512 - 1) >> 9; + inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; } /** diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 8961f1a8f66..14a86448572 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -44,21 +44,9 @@ #include "v9fs_vfs.h" #include "fid.h" -static void v9fs_clear_inode(struct inode *); static const struct super_operations v9fs_super_ops; /** - * v9fs_clear_inode - release an inode - * @inode: inode to release - * - */ - -static void v9fs_clear_inode(struct inode *inode) -{ - filemap_fdatawrite(inode->i_mapping); -} - -/** * v9fs_set_super - set the superblock * @s: super block * @data: file system specific data @@ -220,6 +208,10 @@ v9fs_umount_begin(struct super_block *sb) } static const struct super_operations v9fs_super_ops = { +#ifdef CONFIG_9P_FSCACHE + .alloc_inode = v9fs_alloc_inode, + .destroy_inode = v9fs_destroy_inode, +#endif .statfs = simple_statfs, .clear_inode = v9fs_clear_inode, .show_options = generic_show_options, diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 798cb071d13..3f57ce4bee5 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -19,9 +19,6 @@ static int adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, int create) { - if (block < 0) - goto abort_negative; - if (!create) { if (block >= inode->i_blocks) goto abort_toobig; @@ -34,10 +31,6 @@ adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, /* don't support allocation of blocks yet */ return -EIO; -abort_negative: - adfs_error(inode->i_sb, "block %d < 0", block); - return -EIO; - abort_toobig: return 0; } diff --git a/fs/attr.c b/fs/attr.c index 9fe1b1bd30a..96d394bdadd 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -18,7 +18,7 @@ /* Taken over from the old code... */ /* POSIX UID/GID verification for setting inode attributes. */ -int inode_change_ok(struct inode *inode, struct iattr *attr) +int inode_change_ok(const struct inode *inode, struct iattr *attr) { int retval = -EPERM; unsigned int ia_valid = attr->ia_valid; @@ -60,9 +60,51 @@ fine: error: return retval; } - EXPORT_SYMBOL(inode_change_ok); +/** + * inode_newsize_ok - may this inode be truncated to a given size + * @inode: the inode to be truncated + * @offset: the new size to assign to the inode + * @Returns: 0 on success, -ve errno on failure + * + * inode_newsize_ok will check filesystem limits and ulimits to check that the + * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ + * when necessary. Caller must not proceed with inode size change if failure is + * returned. @inode must be a file (not directory), with appropriate + * permissions to allow truncate (inode_newsize_ok does NOT check these + * conditions). + * + * inode_newsize_ok must be called with i_mutex held. + */ +int inode_newsize_ok(const struct inode *inode, loff_t offset) +{ + if (inode->i_size < offset) { + unsigned long limit; + + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit != RLIM_INFINITY && offset > limit) + goto out_sig; + if (offset > inode->i_sb->s_maxbytes) + goto out_big; + } else { + /* + * truncation of in-use swapfiles is disallowed - it would + * cause subsequent swapout to scribble on the now-freed + * blocks. + */ + if (IS_SWAPFILE(inode)) + return -ETXTBSY; + } + + return 0; +out_sig: + send_sig(SIGXFSZ, current, 0); +out_big: + return -EFBIG; +} +EXPORT_SYMBOL(inode_newsize_ok); + int inode_setattr(struct inode * inode, struct iattr * attr) { unsigned int ia_valid = attr->ia_valid; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index dd376c124e7..33baf27fac7 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -737,12 +737,7 @@ befs_put_super(struct super_block *sb) { kfree(BEFS_SB(sb)->mount_opts.iocharset); BEFS_SB(sb)->mount_opts.iocharset = NULL; - - if (BEFS_SB(sb)->nls) { - unload_nls(BEFS_SB(sb)->nls); - BEFS_SB(sb)->nls = NULL; - } - + unload_nls(BEFS_SB(sb)->nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 442d94fe255..b9b3bb51b1e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1711,42 +1711,52 @@ struct elf_note_info { int numnote; }; -static int fill_note_info(struct elfhdr *elf, int phdrs, - struct elf_note_info *info, - long signr, struct pt_regs *regs) +static int elf_note_info_init(struct elf_note_info *info) { -#define NUM_NOTES 6 - struct list_head *t; - - info->notes = NULL; - info->prstatus = NULL; - info->psinfo = NULL; - info->fpu = NULL; -#ifdef ELF_CORE_COPY_XFPREGS - info->xfpu = NULL; -#endif + memset(info, 0, sizeof(*info)); INIT_LIST_HEAD(&info->thread_list); - info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), - GFP_KERNEL); + /* Allocate space for six ELF notes */ + info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL); if (!info->notes) return 0; info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); if (!info->psinfo) - return 0; + goto notes_free; info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); if (!info->prstatus) - return 0; + goto psinfo_free; info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); if (!info->fpu) - return 0; + goto prstatus_free; #ifdef ELF_CORE_COPY_XFPREGS info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); if (!info->xfpu) - return 0; + goto fpu_free; +#endif + return 1; +#ifdef ELF_CORE_COPY_XFPREGS + fpu_free: + kfree(info->fpu); #endif + prstatus_free: + kfree(info->prstatus); + psinfo_free: + kfree(info->psinfo); + notes_free: + kfree(info->notes); + return 0; +} + +static int fill_note_info(struct elfhdr *elf, int phdrs, + struct elf_note_info *info, + long signr, struct pt_regs *regs) +{ + struct list_head *t; + + if (!elf_note_info_init(info)) + return 0; - info->thread_status_size = 0; if (signr) { struct core_thread *ct; struct elf_thread_status *ets; @@ -1806,8 +1816,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, #endif return 1; - -#undef NUM_NOTES } static size_t get_note_info_size(struct elf_note_info *info) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 76285471073..38502c67987 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -283,20 +283,23 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, } stack_size = exec_params.stack_size; - if (stack_size < interp_params.stack_size) - stack_size = interp_params.stack_size; - if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) executable_stack = EXSTACK_ENABLE_X; else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) executable_stack = EXSTACK_DISABLE_X; - else if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) - executable_stack = EXSTACK_ENABLE_X; - else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) - executable_stack = EXSTACK_DISABLE_X; else executable_stack = EXSTACK_DEFAULT; + if (stack_size == 0) { + stack_size = interp_params.stack_size; + if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) + executable_stack = EXSTACK_ENABLE_X; + else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) + executable_stack = EXSTACK_DISABLE_X; + else + executable_stack = EXSTACK_DEFAULT; + } + retval = -ENOEXEC; if (stack_size == 0) goto error; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index e92f229e3c6..a2796651e75 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -278,8 +278,6 @@ static int decompress_exec( ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos); if (ret <= 0) break; - if (ret >= (unsigned long) -4096) - break; len -= ret; strm.next_in = buf; @@ -335,7 +333,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp) "(%d != %d)", (unsigned) r, curid, id); goto failed; } else if ( ! p->lib_list[id].loaded && - load_flat_shared_library(id, p) > (unsigned long) -4096) { + IS_ERR_VALUE(load_flat_shared_library(id, p))) { printk("BINFMT_FLAT: failed to load library %d", id); goto failed; } @@ -545,7 +543,7 @@ static int load_flat_file(struct linux_binprm * bprm, textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_EXECUTABLE, 0); up_write(¤t->mm->mmap_sem); - if (!textpos || textpos >= (unsigned long) -4096) { + if (!textpos || IS_ERR_VALUE(textpos)) { if (!textpos) textpos = (unsigned long) -ENOMEM; printk("Unable to mmap process text, errno %d\n", (int)-textpos); @@ -560,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); up_write(¤t->mm->mmap_sem); - if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) { + if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) { if (!realdatastart) realdatastart = (unsigned long) -ENOMEM; printk("Unable to allocate RAM for process data, errno %d\n", @@ -587,7 +585,7 @@ static int load_flat_file(struct linux_binprm * bprm, result = bprm->file->f_op->read(bprm->file, (char *) datapos, data_len + (relocs * sizeof(unsigned long)), &fpos); } - if (result >= (unsigned long)-4096) { + if (IS_ERR_VALUE(result)) { printk("Unable to read data+bss, errno %d\n", (int)-result); do_munmap(current->mm, textpos, text_len); do_munmap(current->mm, realdatastart, data_len + extra); @@ -607,7 +605,7 @@ static int load_flat_file(struct linux_binprm * bprm, PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); up_write(¤t->mm->mmap_sem); - if (!textpos || textpos >= (unsigned long) -4096) { + if (!textpos || IS_ERR_VALUE(textpos)) { if (!textpos) textpos = (unsigned long) -ENOMEM; printk("Unable to allocate RAM for process text/data, errno %d\n", @@ -641,7 +639,7 @@ static int load_flat_file(struct linux_binprm * bprm, fpos = 0; result = bprm->file->f_op->read(bprm->file, (char *) textpos, text_len, &fpos); - if (result < (unsigned long) -4096) + if (!IS_ERR_VALUE(result)) result = decompress_exec(bprm, text_len, (char *) datapos, data_len + (relocs * sizeof(unsigned long)), 0); } @@ -651,13 +649,13 @@ static int load_flat_file(struct linux_binprm * bprm, fpos = 0; result = bprm->file->f_op->read(bprm->file, (char *) textpos, text_len, &fpos); - if (result < (unsigned long) -4096) { + if (!IS_ERR_VALUE(result)) { fpos = ntohl(hdr->data_start); result = bprm->file->f_op->read(bprm->file, (char *) datapos, data_len + (relocs * sizeof(unsigned long)), &fpos); } } - if (result >= (unsigned long)-4096) { + if (IS_ERR_VALUE(result)) { printk("Unable to read code+data+bss, errno %d\n",(int)-result); do_munmap(current->mm, textpos, text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long)); @@ -835,7 +833,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs) res = prepare_binprm(&bprm); - if (res <= (unsigned long)-4096) + if (!IS_ERR_VALUE(res)) res = load_flat_file(&bprm, libs, id, NULL); abort_creds(bprm.cred); @@ -880,7 +878,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ res = load_flat_file(bprm, &libinfo, 0, &stack_len); - if (res > (unsigned long)-4096) + if (IS_ERR_VALUE(res)) return res; /* Update data segment pointers for all libraries */ diff --git a/fs/block_dev.c b/fs/block_dev.c index 5d1ed50bd46..9cf4b926f8e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -216,8 +216,6 @@ EXPORT_SYMBOL(fsync_bdev); * freeze_bdev -- lock a filesystem and force it into a consistent state * @bdev: blockdevice to lock * - * This takes the block device bd_mount_sem to make sure no new mounts - * happen on bdev until thaw_bdev() is called. * If a superblock is found on this device, we take the s_umount semaphore * on it to make sure nobody unmounts until the snapshot creation is done. * The reference counter (bd_fsfreeze_count) guarantees that only the last @@ -232,46 +230,55 @@ struct super_block *freeze_bdev(struct block_device *bdev) int error = 0; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (bdev->bd_fsfreeze_count > 0) { - bdev->bd_fsfreeze_count++; + if (++bdev->bd_fsfreeze_count > 1) { + /* + * We don't even need to grab a reference - the first call + * to freeze_bdev grab an active reference and only the last + * thaw_bdev drops it. + */ sb = get_super(bdev); + drop_super(sb); mutex_unlock(&bdev->bd_fsfreeze_mutex); return sb; } - bdev->bd_fsfreeze_count++; - - down(&bdev->bd_mount_sem); - sb = get_super(bdev); - if (sb && !(sb->s_flags & MS_RDONLY)) { - sb->s_frozen = SB_FREEZE_WRITE; - smp_wmb(); - - sync_filesystem(sb); - - sb->s_frozen = SB_FREEZE_TRANS; - smp_wmb(); - - sync_blockdev(sb->s_bdev); - - if (sb->s_op->freeze_fs) { - error = sb->s_op->freeze_fs(sb); - if (error) { - printk(KERN_ERR - "VFS:Filesystem freeze failed\n"); - sb->s_frozen = SB_UNFROZEN; - drop_super(sb); - up(&bdev->bd_mount_sem); - bdev->bd_fsfreeze_count--; - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return ERR_PTR(error); - } + + sb = get_active_super(bdev); + if (!sb) + goto out; + if (sb->s_flags & MS_RDONLY) { + deactivate_locked_super(sb); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return sb; + } + + sb->s_frozen = SB_FREEZE_WRITE; + smp_wmb(); + + sync_filesystem(sb); + + sb->s_frozen = SB_FREEZE_TRANS; + smp_wmb(); + + sync_blockdev(sb->s_bdev); + + if (sb->s_op->freeze_fs) { + error = sb->s_op->freeze_fs(sb); + if (error) { + printk(KERN_ERR + "VFS:Filesystem freeze failed\n"); + sb->s_frozen = SB_UNFROZEN; + deactivate_locked_super(sb); + bdev->bd_fsfreeze_count--; + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return ERR_PTR(error); } } + up_write(&sb->s_umount); + out: sync_blockdev(bdev); mutex_unlock(&bdev->bd_fsfreeze_mutex); - - return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */ + return sb; /* thaw_bdev releases s->s_umount */ } EXPORT_SYMBOL(freeze_bdev); @@ -284,44 +291,44 @@ EXPORT_SYMBOL(freeze_bdev); */ int thaw_bdev(struct block_device *bdev, struct super_block *sb) { - int error = 0; + int error = -EINVAL; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (!bdev->bd_fsfreeze_count) { - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return -EINVAL; - } - - bdev->bd_fsfreeze_count--; - if (bdev->bd_fsfreeze_count > 0) { - if (sb) - drop_super(sb); - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return 0; - } - - if (sb) { - BUG_ON(sb->s_bdev != bdev); - if (!(sb->s_flags & MS_RDONLY)) { - if (sb->s_op->unfreeze_fs) { - error = sb->s_op->unfreeze_fs(sb); - if (error) { - printk(KERN_ERR - "VFS:Filesystem thaw failed\n"); - sb->s_frozen = SB_FREEZE_TRANS; - bdev->bd_fsfreeze_count++; - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return error; - } - } - sb->s_frozen = SB_UNFROZEN; - smp_wmb(); - wake_up(&sb->s_wait_unfrozen); + if (!bdev->bd_fsfreeze_count) + goto out_unlock; + + error = 0; + if (--bdev->bd_fsfreeze_count > 0) + goto out_unlock; + + if (!sb) + goto out_unlock; + + BUG_ON(sb->s_bdev != bdev); + down_write(&sb->s_umount); + if (sb->s_flags & MS_RDONLY) + goto out_deactivate; + + if (sb->s_op->unfreeze_fs) { + error = sb->s_op->unfreeze_fs(sb); + if (error) { + printk(KERN_ERR + "VFS:Filesystem thaw failed\n"); + sb->s_frozen = SB_FREEZE_TRANS; + bdev->bd_fsfreeze_count++; + mutex_unlock(&bdev->bd_fsfreeze_mutex); + return error; } - drop_super(sb); } - up(&bdev->bd_mount_sem); + sb->s_frozen = SB_UNFROZEN; + smp_wmb(); + wake_up(&sb->s_wait_unfrozen); + +out_deactivate: + if (sb) + deactivate_locked_super(sb); +out_unlock: mutex_unlock(&bdev->bd_fsfreeze_mutex); return 0; } @@ -430,7 +437,6 @@ static void init_once(void *foo) memset(bdev, 0, sizeof(*bdev)); mutex_init(&bdev->bd_mutex); - sema_init(&bdev->bd_mount_sem, 1); INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); #ifdef CONFIG_SYSFS diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 019e8af449a..282ca085c2f 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -48,6 +48,9 @@ struct btrfs_worker_thread { /* number of things on the pending list */ atomic_t num_pending; + /* reference counter for this struct */ + atomic_t refs; + unsigned long sequence; /* protects the pending list. */ @@ -71,7 +74,12 @@ static void check_idle_worker(struct btrfs_worker_thread *worker) unsigned long flags; spin_lock_irqsave(&worker->workers->lock, flags); worker->idle = 1; - list_move(&worker->worker_list, &worker->workers->idle_list); + + /* the list may be empty if the worker is just starting */ + if (!list_empty(&worker->worker_list)) { + list_move(&worker->worker_list, + &worker->workers->idle_list); + } spin_unlock_irqrestore(&worker->workers->lock, flags); } } @@ -87,23 +95,49 @@ static void check_busy_worker(struct btrfs_worker_thread *worker) unsigned long flags; spin_lock_irqsave(&worker->workers->lock, flags); worker->idle = 0; - list_move_tail(&worker->worker_list, - &worker->workers->worker_list); + + if (!list_empty(&worker->worker_list)) { + list_move_tail(&worker->worker_list, + &worker->workers->worker_list); + } spin_unlock_irqrestore(&worker->workers->lock, flags); } } -static noinline int run_ordered_completions(struct btrfs_workers *workers, - struct btrfs_work *work) +static void check_pending_worker_creates(struct btrfs_worker_thread *worker) { + struct btrfs_workers *workers = worker->workers; unsigned long flags; + rmb(); + if (!workers->atomic_start_pending) + return; + + spin_lock_irqsave(&workers->lock, flags); + if (!workers->atomic_start_pending) + goto out; + + workers->atomic_start_pending = 0; + if (workers->num_workers >= workers->max_workers) + goto out; + + spin_unlock_irqrestore(&workers->lock, flags); + btrfs_start_workers(workers, 1); + return; + +out: + spin_unlock_irqrestore(&workers->lock, flags); +} + +static noinline int run_ordered_completions(struct btrfs_workers *workers, + struct btrfs_work *work) +{ if (!workers->ordered) return 0; set_bit(WORK_DONE_BIT, &work->flags); - spin_lock_irqsave(&workers->lock, flags); + spin_lock(&workers->order_lock); while (1) { if (!list_empty(&workers->prio_order_list)) { @@ -126,45 +160,118 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) break; - spin_unlock_irqrestore(&workers->lock, flags); + spin_unlock(&workers->order_lock); work->ordered_func(work); /* now take the lock again and call the freeing code */ - spin_lock_irqsave(&workers->lock, flags); + spin_lock(&workers->order_lock); list_del(&work->order_list); work->ordered_free(work); } - spin_unlock_irqrestore(&workers->lock, flags); + spin_unlock(&workers->order_lock); return 0; } +static void put_worker(struct btrfs_worker_thread *worker) +{ + if (atomic_dec_and_test(&worker->refs)) + kfree(worker); +} + +static int try_worker_shutdown(struct btrfs_worker_thread *worker) +{ + int freeit = 0; + + spin_lock_irq(&worker->lock); + spin_lock(&worker->workers->lock); + if (worker->workers->num_workers > 1 && + worker->idle && + !worker->working && + !list_empty(&worker->worker_list) && + list_empty(&worker->prio_pending) && + list_empty(&worker->pending) && + atomic_read(&worker->num_pending) == 0) { + freeit = 1; + list_del_init(&worker->worker_list); + worker->workers->num_workers--; + } + spin_unlock(&worker->workers->lock); + spin_unlock_irq(&worker->lock); + + if (freeit) + put_worker(worker); + return freeit; +} + +static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, + struct list_head *prio_head, + struct list_head *head) +{ + struct btrfs_work *work = NULL; + struct list_head *cur = NULL; + + if(!list_empty(prio_head)) + cur = prio_head->next; + + smp_mb(); + if (!list_empty(&worker->prio_pending)) + goto refill; + + if (!list_empty(head)) + cur = head->next; + + if (cur) + goto out; + +refill: + spin_lock_irq(&worker->lock); + list_splice_tail_init(&worker->prio_pending, prio_head); + list_splice_tail_init(&worker->pending, head); + + if (!list_empty(prio_head)) + cur = prio_head->next; + else if (!list_empty(head)) + cur = head->next; + spin_unlock_irq(&worker->lock); + + if (!cur) + goto out_fail; + +out: + work = list_entry(cur, struct btrfs_work, list); + +out_fail: + return work; +} + /* * main loop for servicing work items */ static int worker_loop(void *arg) { struct btrfs_worker_thread *worker = arg; - struct list_head *cur; + struct list_head head; + struct list_head prio_head; struct btrfs_work *work; + + INIT_LIST_HEAD(&head); + INIT_LIST_HEAD(&prio_head); + do { - spin_lock_irq(&worker->lock); -again_locked: +again: while (1) { - if (!list_empty(&worker->prio_pending)) - cur = worker->prio_pending.next; - else if (!list_empty(&worker->pending)) - cur = worker->pending.next; - else + + + work = get_next_work(worker, &prio_head, &head); + if (!work) break; - work = list_entry(cur, struct btrfs_work, list); list_del(&work->list); clear_bit(WORK_QUEUED_BIT, &work->flags); work->worker = worker; - spin_unlock_irq(&worker->lock); work->func(work); @@ -175,9 +282,13 @@ again_locked: */ run_ordered_completions(worker->workers, work); - spin_lock_irq(&worker->lock); - check_idle_worker(worker); + check_pending_worker_creates(worker); + } + + spin_lock_irq(&worker->lock); + check_idle_worker(worker); + if (freezing(current)) { worker->working = 0; spin_unlock_irq(&worker->lock); @@ -216,8 +327,10 @@ again_locked: spin_lock_irq(&worker->lock); set_current_state(TASK_INTERRUPTIBLE); if (!list_empty(&worker->pending) || - !list_empty(&worker->prio_pending)) - goto again_locked; + !list_empty(&worker->prio_pending)) { + spin_unlock_irq(&worker->lock); + goto again; + } /* * this makes sure we get a wakeup when someone @@ -226,8 +339,13 @@ again_locked: worker->working = 0; spin_unlock_irq(&worker->lock); - if (!kthread_should_stop()) - schedule(); + if (!kthread_should_stop()) { + schedule_timeout(HZ * 120); + if (!worker->working && + try_worker_shutdown(worker)) { + return 0; + } + } } __set_current_state(TASK_RUNNING); } @@ -242,16 +360,30 @@ int btrfs_stop_workers(struct btrfs_workers *workers) { struct list_head *cur; struct btrfs_worker_thread *worker; + int can_stop; + spin_lock_irq(&workers->lock); list_splice_init(&workers->idle_list, &workers->worker_list); while (!list_empty(&workers->worker_list)) { cur = workers->worker_list.next; worker = list_entry(cur, struct btrfs_worker_thread, worker_list); - kthread_stop(worker->task); - list_del(&worker->worker_list); - kfree(worker); + + atomic_inc(&worker->refs); + workers->num_workers -= 1; + if (!list_empty(&worker->worker_list)) { + list_del_init(&worker->worker_list); + put_worker(worker); + can_stop = 1; + } else + can_stop = 0; + spin_unlock_irq(&workers->lock); + if (can_stop) + kthread_stop(worker->task); + spin_lock_irq(&workers->lock); + put_worker(worker); } + spin_unlock_irq(&workers->lock); return 0; } @@ -266,10 +398,13 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) INIT_LIST_HEAD(&workers->order_list); INIT_LIST_HEAD(&workers->prio_order_list); spin_lock_init(&workers->lock); + spin_lock_init(&workers->order_lock); workers->max_workers = max; workers->idle_thresh = 32; workers->name = name; workers->ordered = 0; + workers->atomic_start_pending = 0; + workers->atomic_worker_start = 0; } /* @@ -293,7 +428,9 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) INIT_LIST_HEAD(&worker->prio_pending); INIT_LIST_HEAD(&worker->worker_list); spin_lock_init(&worker->lock); + atomic_set(&worker->num_pending, 0); + atomic_set(&worker->refs, 1); worker->workers = workers; worker->task = kthread_run(worker_loop, worker, "btrfs-%s-%d", workers->name, @@ -303,7 +440,6 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) kfree(worker); goto fail; } - spin_lock_irq(&workers->lock); list_add_tail(&worker->worker_list, &workers->idle_list); worker->idle = 1; @@ -350,7 +486,6 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) */ next = workers->worker_list.next; worker = list_entry(next, struct btrfs_worker_thread, worker_list); - atomic_inc(&worker->num_pending); worker->sequence++; if (worker->sequence % workers->idle_thresh == 0) @@ -367,28 +502,18 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) { struct btrfs_worker_thread *worker; unsigned long flags; + struct list_head *fallback; again: spin_lock_irqsave(&workers->lock, flags); worker = next_worker(workers); - spin_unlock_irqrestore(&workers->lock, flags); if (!worker) { - spin_lock_irqsave(&workers->lock, flags); if (workers->num_workers >= workers->max_workers) { - struct list_head *fallback = NULL; - /* - * we have failed to find any workers, just - * return the force one - */ - if (!list_empty(&workers->worker_list)) - fallback = workers->worker_list.next; - if (!list_empty(&workers->idle_list)) - fallback = workers->idle_list.next; - BUG_ON(!fallback); - worker = list_entry(fallback, - struct btrfs_worker_thread, worker_list); - spin_unlock_irqrestore(&workers->lock, flags); + goto fallback; + } else if (workers->atomic_worker_start) { + workers->atomic_start_pending = 1; + goto fallback; } else { spin_unlock_irqrestore(&workers->lock, flags); /* we're below the limit, start another worker */ @@ -396,6 +521,28 @@ again: goto again; } } + goto found; + +fallback: + fallback = NULL; + /* + * we have failed to find any workers, just + * return the first one we can find. + */ + if (!list_empty(&workers->worker_list)) + fallback = workers->worker_list.next; + if (!list_empty(&workers->idle_list)) + fallback = workers->idle_list.next; + BUG_ON(!fallback); + worker = list_entry(fallback, + struct btrfs_worker_thread, worker_list); +found: + /* + * this makes sure the worker doesn't exit before it is placed + * onto a busy/idle list + */ + atomic_inc(&worker->num_pending); + spin_unlock_irqrestore(&workers->lock, flags); return worker; } @@ -427,7 +574,7 @@ int btrfs_requeue_work(struct btrfs_work *work) spin_lock(&worker->workers->lock); worker->idle = 0; list_move_tail(&worker->worker_list, - &worker->workers->worker_list); + &worker->workers->worker_list); spin_unlock(&worker->workers->lock); } if (!worker->working) { @@ -435,9 +582,9 @@ int btrfs_requeue_work(struct btrfs_work *work) worker->working = 1; } - spin_unlock_irqrestore(&worker->lock, flags); if (wake) wake_up_process(worker->task); + spin_unlock_irqrestore(&worker->lock, flags); out: return 0; @@ -463,14 +610,18 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) worker = find_worker(workers); if (workers->ordered) { - spin_lock_irqsave(&workers->lock, flags); + /* + * you're not allowed to do ordered queues from an + * interrupt handler + */ + spin_lock(&workers->order_lock); if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { list_add_tail(&work->order_list, &workers->prio_order_list); } else { list_add_tail(&work->order_list, &workers->order_list); } - spin_unlock_irqrestore(&workers->lock, flags); + spin_unlock(&workers->order_lock); } else { INIT_LIST_HEAD(&work->order_list); } @@ -481,7 +632,6 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) list_add_tail(&work->list, &worker->prio_pending); else list_add_tail(&work->list, &worker->pending); - atomic_inc(&worker->num_pending); check_busy_worker(worker); /* @@ -492,10 +642,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) wake = 1; worker->working = 1; - spin_unlock_irqrestore(&worker->lock, flags); - if (wake) wake_up_process(worker->task); + spin_unlock_irqrestore(&worker->lock, flags); + out: return 0; } diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 1b511c109db..fc089b95ec1 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -73,6 +73,15 @@ struct btrfs_workers { /* force completions in the order they were queued */ int ordered; + /* more workers required, but in an interrupt handler */ + int atomic_start_pending; + + /* + * are we allowed to sleep while starting workers or are we required + * to start them at a later time? + */ + int atomic_worker_start; + /* list with all the work threads. The workers on the idle thread * may be actively servicing jobs, but they haven't yet hit the * idle thresh limit above. @@ -90,6 +99,9 @@ struct btrfs_workers { /* lock for finding the next worker thread to queue on */ spinlock_t lock; + /* lock for the ordered lists */ + spinlock_t order_lock; + /* extra name for this worker, used for current->name */ char *name; }; diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index ea1ea0af8c0..82ee56bba29 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -138,6 +138,7 @@ struct btrfs_inode { * of these. */ unsigned ordered_data_close:1; + unsigned dummy_inode:1; struct inode vfs_inode; }; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 9d8ba4d54a3..a11a32058b5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -506,10 +506,10 @@ static noinline int add_ra_bio_pages(struct inode *inode, */ set_page_extent_mapped(page); lock_extent(tree, last_offset, end, GFP_NOFS); - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, last_offset, PAGE_CACHE_SIZE); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); if (!em || last_offset < em->start || (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || @@ -593,11 +593,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, em_tree = &BTRFS_I(inode)->extent_tree; /* we need the actual starting offset of this extent in the file */ - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, page_offset(bio->bi_io_vec->bv_page), PAGE_CACHE_SIZE); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); compressed_len = em->block_len; cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3fdcc0512d3..ec96f3a6d53 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2853,6 +2853,12 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, int split; int num_doubles = 0; + l = path->nodes[0]; + slot = path->slots[0]; + if (extend && data_size + btrfs_item_size_nr(l, slot) + + sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) + return -EOVERFLOW; + /* first try to make some room by pushing left and right */ if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { wret = push_leaf_right(trans, root, path, data_size, 0); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 837435ce84c..80599b4e42b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -114,6 +114,10 @@ struct btrfs_ordered_sum; */ #define BTRFS_DEV_ITEMS_OBJECTID 1ULL +#define BTRFS_BTREE_INODE_OBJECTID 1 + +#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 + /* * we can actually store much bigger names, but lets not confuse the rest * of linux @@ -670,6 +674,7 @@ struct btrfs_space_info { u64 bytes_reserved; /* total bytes the allocator has reserved for current allocations */ u64 bytes_readonly; /* total bytes that are read only */ + u64 bytes_super; /* total bytes reserved for the super blocks */ /* delalloc accounting */ u64 bytes_delalloc; /* number of bytes reserved for allocation, @@ -726,6 +731,15 @@ enum btrfs_caching_type { BTRFS_CACHE_FINISHED = 2, }; +struct btrfs_caching_control { + struct list_head list; + struct mutex mutex; + wait_queue_head_t wait; + struct btrfs_block_group_cache *block_group; + u64 progress; + atomic_t count; +}; + struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; @@ -733,6 +747,7 @@ struct btrfs_block_group_cache { spinlock_t lock; u64 pinned; u64 reserved; + u64 bytes_super; u64 flags; u64 sectorsize; int extents_thresh; @@ -742,8 +757,9 @@ struct btrfs_block_group_cache { int dirty; /* cache tracking stuff */ - wait_queue_head_t caching_q; int cached; + struct btrfs_caching_control *caching_ctl; + u64 last_byte_to_unpin; struct btrfs_space_info *space_info; @@ -782,13 +798,16 @@ struct btrfs_fs_info { /* the log root tree is a directory of all the other log roots */ struct btrfs_root *log_root_tree; + + spinlock_t fs_roots_radix_lock; struct radix_tree_root fs_roots_radix; /* block group cache stuff */ spinlock_t block_group_cache_lock; struct rb_root block_group_cache_tree; - struct extent_io_tree pinned_extents; + struct extent_io_tree freed_extents[2]; + struct extent_io_tree *pinned_extents; /* logical->physical extent mapping */ struct btrfs_mapping_tree mapping_tree; @@ -822,11 +841,7 @@ struct btrfs_fs_info { struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; struct mutex chunk_mutex; - struct mutex drop_mutex; struct mutex volume_mutex; - struct mutex tree_reloc_mutex; - struct rw_semaphore extent_commit_sem; - /* * this protects the ordered operations list only while we are * processing all of the entries on it. This way we make @@ -835,10 +850,16 @@ struct btrfs_fs_info { * before jumping into the main commit. */ struct mutex ordered_operations_mutex; + struct rw_semaphore extent_commit_sem; + + struct rw_semaphore subvol_sem; + + struct srcu_struct subvol_srcu; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; + struct list_head caching_block_groups; atomic_t nr_async_submits; atomic_t async_submit_draining; @@ -996,10 +1017,12 @@ struct btrfs_root { u32 stripesize; u32 type; - u64 highest_inode; - u64 last_inode_alloc; + + u64 highest_objectid; int ref_cows; int track_dirty; + int in_radix; + u64 defrag_trans_start; struct btrfs_key defrag_progress; struct btrfs_key defrag_max; @@ -1920,8 +1943,8 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); -int btrfs_update_pinned_extents(struct btrfs_root *root, - u64 bytenr, u64 num, int pin); +int btrfs_pin_extent(struct btrfs_root *root, + u64 bytenr, u64 num, int reserved); int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, @@ -1971,9 +1994,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 owner, u64 offset); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); +int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_io_tree *unpin); + struct btrfs_root *root); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -1984,6 +2008,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); +int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr); int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, u64 type, u64 chunk_objectid, u64 chunk_offset, @@ -2006,7 +2031,6 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, u64 bytes); void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, u64 bytes); -void btrfs_free_pinned_extents(struct btrfs_fs_info *info); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); @@ -2100,12 +2124,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct extent_buffer *parent); /* root-item.c */ int btrfs_find_root_ref(struct btrfs_root *tree_root, - struct btrfs_path *path, - u64 root_id, u64 ref_id); + struct btrfs_path *path, + u64 root_id, u64 ref_id); int btrfs_add_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root, - u64 root_id, u8 type, u64 ref_id, - u64 dirid, u64 sequence, + u64 root_id, u64 ref_id, u64 dirid, u64 sequence, + const char *name, int name_len); +int btrfs_del_root_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *tree_root, + u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, const char *name, int name_len); int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key); @@ -2120,6 +2147,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct int btrfs_search_root(struct btrfs_root *root, u64 search_start, u64 *found_objectid); int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); +int btrfs_find_orphan_roots(struct btrfs_root *tree_root); int btrfs_set_root_node(struct btrfs_root_item *item, struct extent_buffer *node); /* dir-item.c */ @@ -2138,6 +2166,10 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, struct btrfs_path *path, u64 dir, u64 objectid, const char *name, int name_len, int mod); +struct btrfs_dir_item * +btrfs_search_dir_index_item(struct btrfs_root *root, + struct btrfs_path *path, u64 dirid, + const char *name, int name_len); struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len); @@ -2160,6 +2192,7 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 offset); int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 offset); +int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset); /* inode-map.c */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, @@ -2232,6 +2265,10 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, int btrfs_add_link(struct btrfs_trans_handle *trans, struct inode *parent_inode, struct inode *inode, const char *name, int name_len, int add_backref, u64 index); +int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, u64 objectid, + const char *name, int name_len); int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 new_size, @@ -2242,7 +2279,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, - struct btrfs_root *new_root, struct dentry *dentry, + struct btrfs_root *new_root, u64 new_dirid, u64 alloc_hint); int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); @@ -2258,6 +2295,7 @@ int btrfs_write_inode(struct inode *inode, int wait); void btrfs_dirty_inode(struct inode *inode); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); +void btrfs_drop_inode(struct inode *inode); int btrfs_init_cachep(void); void btrfs_destroy_cachep(void); long btrfs_ioctl_trans_end(struct file *file); @@ -2275,6 +2313,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); void btrfs_orphan_cleanup(struct btrfs_root *root); int btrfs_cont_expand(struct inode *inode, loff_t size); +int btrfs_invalidate_inodes(struct btrfs_root *root); +extern struct dentry_operations btrfs_dentry_operations; /* ioctl.c */ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); @@ -2290,7 +2330,7 @@ extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 locked_end, - u64 inline_limit, u64 *hint_block); + u64 inline_limit, u64 *hint_block, int drop_cache); int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 1d70236ba00..f3a6075519c 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -281,6 +281,53 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, return btrfs_match_dir_item_name(root, path, name, name_len); } +struct btrfs_dir_item * +btrfs_search_dir_index_item(struct btrfs_root *root, + struct btrfs_path *path, u64 dirid, + const char *name, int name_len) +{ + struct extent_buffer *leaf; + struct btrfs_dir_item *di; + struct btrfs_key key; + u32 nritems; + int ret; + + key.objectid = dirid; + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ERR_PTR(ret); + + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + + while (1) { + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) + break; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + continue; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY) + break; + + di = btrfs_match_dir_item_name(root, path, name, name_len); + if (di) + return di; + + path->slots[0]++; + } + return NULL; +} + struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c4173146bb..644e796fd64 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -41,6 +41,7 @@ static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); +static void free_fs_root(struct btrfs_root *root); static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); @@ -123,15 +124,15 @@ static struct extent_map *btree_get_extent(struct inode *inode, struct extent_map *em; int ret; - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (em) { em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); goto out; } - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); em = alloc_extent_map(GFP_NOFS); if (!em) { @@ -144,7 +145,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, em->block_start = 0; em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); if (ret == -EEXIST) { u64 failed_start = em->start; @@ -163,7 +164,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, free_extent_map(em); em = NULL; } - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret) em = ERR_PTR(ret); @@ -895,8 +896,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->fs_info = fs_info; root->objectid = objectid; root->last_trans = 0; - root->highest_inode = 0; - root->last_inode_alloc = 0; + root->highest_objectid = 0; root->name = NULL; root->in_sysfs = 0; root->inode_tree.rb_node = NULL; @@ -952,14 +952,16 @@ static int find_and_setup_root(struct btrfs_root *tree_root, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); + if (ret > 0) + return -ENOENT; BUG_ON(ret); generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); - root->commit_root = btrfs_root_node(root); BUG_ON(!root->node); + root->commit_root = btrfs_root_node(root); return 0; } @@ -1095,7 +1097,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_path *path; struct extent_buffer *l; - u64 highest_inode; u64 generation; u32 blocksize; int ret = 0; @@ -1110,7 +1111,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, kfree(root); return ERR_PTR(ret); } - goto insert; + goto out; } __setup_root(tree_root->nodesize, tree_root->leafsize, @@ -1120,39 +1121,30 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, path = btrfs_alloc_path(); BUG_ON(!path); ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); - if (ret != 0) { - if (ret > 0) - ret = -ENOENT; - goto out; + if (ret == 0) { + l = path->nodes[0]; + read_extent_buffer(l, &root->root_item, + btrfs_item_ptr_offset(l, path->slots[0]), + sizeof(root->root_item)); + memcpy(&root->root_key, location, sizeof(*location)); } - l = path->nodes[0]; - read_extent_buffer(l, &root->root_item, - btrfs_item_ptr_offset(l, path->slots[0]), - sizeof(root->root_item)); - memcpy(&root->root_key, location, sizeof(*location)); - ret = 0; -out: - btrfs_release_path(root, path); btrfs_free_path(path); if (ret) { - kfree(root); + if (ret > 0) + ret = -ENOENT; return ERR_PTR(ret); } + generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); root->commit_root = btrfs_root_node(root); BUG_ON(!root->node); -insert: - if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { +out: + if (location->objectid != BTRFS_TREE_LOG_OBJECTID) root->ref_cows = 1; - ret = btrfs_find_highest_inode(root, &highest_inode); - if (ret == 0) { - root->highest_inode = highest_inode; - root->last_inode_alloc = highest_inode; - } - } + return root; } @@ -1187,39 +1179,66 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, return fs_info->dev_root; if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) return fs_info->csum_root; - +again: + spin_lock(&fs_info->fs_roots_radix_lock); root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); + spin_unlock(&fs_info->fs_roots_radix_lock); if (root) return root; + ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); + if (ret == 0) + ret = -ENOENT; + if (ret < 0) + return ERR_PTR(ret); + root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); if (IS_ERR(root)) return root; + WARN_ON(btrfs_root_refs(&root->root_item) == 0); set_anon_super(&root->anon_super, NULL); + ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); + if (ret) + goto fail; + + spin_lock(&fs_info->fs_roots_radix_lock); ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, root); + if (ret == 0) + root->in_radix = 1; + spin_unlock(&fs_info->fs_roots_radix_lock); + radix_tree_preload_end(); if (ret) { - free_extent_buffer(root->node); - kfree(root); - return ERR_PTR(ret); + if (ret == -EEXIST) { + free_fs_root(root); + goto again; + } + goto fail; } - if (!(fs_info->sb->s_flags & MS_RDONLY)) { - ret = btrfs_find_dead_roots(fs_info->tree_root, - root->root_key.objectid); - BUG_ON(ret); + + ret = btrfs_find_dead_roots(fs_info->tree_root, + root->root_key.objectid); + WARN_ON(ret); + + if (!(fs_info->sb->s_flags & MS_RDONLY)) btrfs_orphan_cleanup(root); - } + return root; +fail: + free_fs_root(root); + return ERR_PTR(ret); } struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen) { + return btrfs_read_fs_root_no_name(fs_info, location); +#if 0 struct btrfs_root *root; int ret; @@ -1236,7 +1255,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } -#if 0 + ret = btrfs_sysfs_add_root(root); if (ret) { free_extent_buffer(root->node); @@ -1244,9 +1263,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } -#endif root->in_sysfs = 1; return root; +#endif } static int btrfs_congested_fn(void *congested_data, int bdi_bits) @@ -1325,9 +1344,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) offset = page_offset(page); em_tree = &BTRFS_I(inode)->extent_tree; - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); if (!em) { __unplug_io_fn(bdi, page); return; @@ -1360,8 +1379,10 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) err = bdi_register(bdi, NULL, "btrfs-%d", atomic_inc_return(&btrfs_bdi_num)); - if (err) + if (err) { + bdi_destroy(bdi); return err; + } bdi->ra_pages = default_backing_dev_info.ra_pages; bdi->unplug_io_fn = btrfs_unplug_io_fn; @@ -1451,9 +1472,12 @@ static int cleaner_kthread(void *arg) break; vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); - mutex_lock(&root->fs_info->cleaner_mutex); - btrfs_clean_old_snapshots(root); - mutex_unlock(&root->fs_info->cleaner_mutex); + + if (!(root->fs_info->sb->s_flags & MS_RDONLY) && + mutex_trylock(&root->fs_info->cleaner_mutex)) { + btrfs_clean_old_snapshots(root); + mutex_unlock(&root->fs_info->cleaner_mutex); + } if (freezing(current)) { refrigerator(); @@ -1558,15 +1582,36 @@ struct btrfs_root *open_ctree(struct super_block *sb, err = -ENOMEM; goto fail; } - INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); + + ret = init_srcu_struct(&fs_info->subvol_srcu); + if (ret) { + err = ret; + goto fail; + } + + ret = setup_bdi(fs_info, &fs_info->bdi); + if (ret) { + err = ret; + goto fail_srcu; + } + + fs_info->btree_inode = new_inode(sb); + if (!fs_info->btree_inode) { + err = -ENOMEM; + goto fail_bdi; + } + + INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); INIT_LIST_HEAD(&fs_info->delalloc_inodes); INIT_LIST_HEAD(&fs_info->ordered_operations); + INIT_LIST_HEAD(&fs_info->caching_block_groups); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); spin_lock_init(&fs_info->ref_cache_lock); + spin_lock_init(&fs_info->fs_roots_radix_lock); init_completion(&fs_info->kobj_unregister); fs_info->tree_root = tree_root; @@ -1585,11 +1630,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; - if (setup_bdi(fs_info, &fs_info->bdi)) - goto fail_bdi; - fs_info->btree_inode = new_inode(sb); - fs_info->btree_inode->i_ino = 1; - fs_info->btree_inode->i_nlink = 1; fs_info->metadata_ratio = 8; fs_info->thread_pool_size = min_t(unsigned long, @@ -1602,6 +1642,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, sb->s_blocksize_bits = blksize_bits(4096); sb->s_bdi = &fs_info->bdi; + fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; + fs_info->btree_inode->i_nlink = 1; /* * we set the i_size on the btree inode to the max possible int. * the real end of the address space is determined by all of @@ -1620,28 +1662,32 @@ struct btrfs_root *open_ctree(struct super_block *sb, BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; + BTRFS_I(fs_info->btree_inode)->root = tree_root; + memset(&BTRFS_I(fs_info->btree_inode)->location, 0, + sizeof(struct btrfs_key)); + BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; + insert_inode_hash(fs_info->btree_inode); + spin_lock_init(&fs_info->block_group_cache_lock); fs_info->block_group_cache_tree.rb_node = NULL; - extent_io_tree_init(&fs_info->pinned_extents, + extent_io_tree_init(&fs_info->freed_extents[0], fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_io_tree_init(&fs_info->freed_extents[1], + fs_info->btree_inode->i_mapping, GFP_NOFS); + fs_info->pinned_extents = &fs_info->freed_extents[0]; fs_info->do_barriers = 1; - BTRFS_I(fs_info->btree_inode)->root = tree_root; - memset(&BTRFS_I(fs_info->btree_inode)->location, 0, - sizeof(struct btrfs_key)); - insert_inode_hash(fs_info->btree_inode); mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->ordered_operations_mutex); mutex_init(&fs_info->tree_log_mutex); - mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); - mutex_init(&fs_info->tree_reloc_mutex); init_rwsem(&fs_info->extent_commit_sem); + init_rwsem(&fs_info->subvol_sem); btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); btrfs_init_free_cluster(&fs_info->data_alloc_cluster); @@ -1700,7 +1746,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, err = -EINVAL; goto fail_iput; } - +printk("thread pool is %d\n", fs_info->thread_pool_size); /* * we need to start all the end_io workers up front because the * queue work function gets called at interrupt time, and so it @@ -1745,20 +1791,22 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->endio_workers.idle_thresh = 4; fs_info->endio_meta_workers.idle_thresh = 4; - fs_info->endio_write_workers.idle_thresh = 64; - fs_info->endio_meta_write_workers.idle_thresh = 64; + fs_info->endio_write_workers.idle_thresh = 2; + fs_info->endio_meta_write_workers.idle_thresh = 2; + + fs_info->endio_workers.atomic_worker_start = 1; + fs_info->endio_meta_workers.atomic_worker_start = 1; + fs_info->endio_write_workers.atomic_worker_start = 1; + fs_info->endio_meta_write_workers.atomic_worker_start = 1; btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->delalloc_workers, 1); btrfs_start_workers(&fs_info->fixup_workers, 1); - btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); - btrfs_start_workers(&fs_info->endio_meta_workers, - fs_info->thread_pool_size); - btrfs_start_workers(&fs_info->endio_meta_write_workers, - fs_info->thread_pool_size); - btrfs_start_workers(&fs_info->endio_write_workers, - fs_info->thread_pool_size); + btrfs_start_workers(&fs_info->endio_workers, 1); + btrfs_start_workers(&fs_info->endio_meta_workers, 1); + btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); + btrfs_start_workers(&fs_info->endio_write_workers, 1); fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, @@ -1918,6 +1966,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, } } + ret = btrfs_find_orphan_roots(tree_root); + BUG_ON(ret); + if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_recover_relocation(tree_root); BUG_ON(ret); @@ -1977,6 +2028,8 @@ fail_iput: btrfs_mapping_tree_free(&fs_info->mapping_tree); fail_bdi: bdi_destroy(&fs_info->bdi); +fail_srcu: + cleanup_srcu_struct(&fs_info->subvol_srcu); fail: kfree(extent_root); kfree(tree_root); @@ -2236,20 +2289,29 @@ int write_ctree_super(struct btrfs_trans_handle *trans, int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { - WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); + spin_lock(&fs_info->fs_roots_radix_lock); radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); + spin_unlock(&fs_info->fs_roots_radix_lock); + + if (btrfs_root_refs(&root->root_item) == 0) + synchronize_srcu(&fs_info->subvol_srcu); + + free_fs_root(root); + return 0; +} + +static void free_fs_root(struct btrfs_root *root) +{ + WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); if (root->anon_super.s_dev) { down_write(&root->anon_super.s_umount); kill_anon_super(&root->anon_super); } - if (root->node) - free_extent_buffer(root->node); - if (root->commit_root) - free_extent_buffer(root->commit_root); + free_extent_buffer(root->node); + free_extent_buffer(root->commit_root); kfree(root->name); kfree(root); - return 0; } static int del_fs_roots(struct btrfs_fs_info *fs_info) @@ -2258,6 +2320,20 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) struct btrfs_root *gang[8]; int i; + while (!list_empty(&fs_info->dead_roots)) { + gang[0] = list_entry(fs_info->dead_roots.next, + struct btrfs_root, root_list); + list_del(&gang[0]->root_list); + + if (gang[0]->in_radix) { + btrfs_free_fs_root(fs_info, gang[0]); + } else { + free_extent_buffer(gang[0]->node); + free_extent_buffer(gang[0]->commit_root); + kfree(gang[0]); + } + } + while (1) { ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)gang, 0, @@ -2287,9 +2363,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) root_objectid = gang[ret - 1]->root_key.objectid + 1; for (i = 0; i < ret; i++) { root_objectid = gang[i]->root_key.objectid; - ret = btrfs_find_dead_roots(fs_info->tree_root, - root_objectid); - BUG_ON(ret); btrfs_orphan_cleanup(gang[i]); } root_objectid++; @@ -2359,7 +2432,6 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(root->fs_info->csum_root->commit_root); btrfs_free_block_groups(root->fs_info); - btrfs_free_pinned_extents(root->fs_info); del_fs_roots(fs_info); @@ -2378,6 +2450,7 @@ int close_ctree(struct btrfs_root *root) btrfs_mapping_tree_free(&fs_info->mapping_tree); bdi_destroy(&fs_info->bdi); + cleanup_srcu_struct(&fs_info->subvol_srcu); kfree(fs_info->extent_root); kfree(fs_info->tree_root); diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 9596b40caa4..ba5c3fd5ab8 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -28,7 +28,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, len = BTRFS_FID_SIZE_NON_CONNECTABLE; type = FILEID_BTRFS_WITHOUT_PARENT; - fid->objectid = BTRFS_I(inode)->location.objectid; + fid->objectid = inode->i_ino; fid->root_objectid = BTRFS_I(inode)->root->objectid; fid->gen = inode->i_generation; @@ -60,34 +60,61 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, } static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, - u64 root_objectid, u32 generation) + u64 root_objectid, u32 generation, + int check_generation) { + struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; struct btrfs_root *root; + struct dentry *dentry; struct inode *inode; struct btrfs_key key; + int index; + int err = 0; + + if (objectid < BTRFS_FIRST_FREE_OBJECTID) + return ERR_PTR(-ESTALE); key.objectid = root_objectid; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); key.offset = (u64)-1; - root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key); - if (IS_ERR(root)) - return ERR_CAST(root); + index = srcu_read_lock(&fs_info->subvol_srcu); + + root = btrfs_read_fs_root_no_name(fs_info, &key); + if (IS_ERR(root)) { + err = PTR_ERR(root); + goto fail; + } + + if (btrfs_root_refs(&root->root_item) == 0) { + err = -ENOENT; + goto fail; + } key.objectid = objectid; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; inode = btrfs_iget(sb, &key, root); - if (IS_ERR(inode)) - return (void *)inode; + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto fail; + } + + srcu_read_unlock(&fs_info->subvol_srcu, index); - if (generation != inode->i_generation) { + if (check_generation && generation != inode->i_generation) { iput(inode); return ERR_PTR(-ESTALE); } - return d_obtain_alias(inode); + dentry = d_obtain_alias(inode); + if (!IS_ERR(dentry)) + dentry->d_op = &btrfs_dentry_operations; + return dentry; +fail: + srcu_read_unlock(&fs_info->subvol_srcu, index); + return ERR_PTR(err); } static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, @@ -111,7 +138,7 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, objectid = fid->parent_objectid; generation = fid->parent_gen; - return btrfs_get_dentry(sb, objectid, root_objectid, generation); + return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); } static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, @@ -133,66 +160,76 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, root_objectid = fid->root_objectid; generation = fid->gen; - return btrfs_get_dentry(sb, objectid, root_objectid, generation); + return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); } static struct dentry *btrfs_get_parent(struct dentry *child) { struct inode *dir = child->d_inode; + static struct dentry *dentry; struct btrfs_root *root = BTRFS_I(dir)->root; - struct btrfs_key key; struct btrfs_path *path; struct extent_buffer *leaf; - int slot; - u64 objectid; + struct btrfs_root_ref *ref; + struct btrfs_key key; + struct btrfs_key found_key; int ret; path = btrfs_alloc_path(); - key.objectid = dir->i_ino; - btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); - key.offset = (u64)-1; + if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { + key.objectid = root->root_key.objectid; + key.type = BTRFS_ROOT_BACKREF_KEY; + key.offset = (u64)-1; + root = root->fs_info->tree_root; + } else { + key.objectid = dir->i_ino; + key.type = BTRFS_INODE_REF_KEY; + key.offset = (u64)-1; + } ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { - /* Error */ - btrfs_free_path(path); - return ERR_PTR(ret); + if (ret < 0) + goto fail; + + BUG_ON(ret == 0); + if (path->slots[0] == 0) { + ret = -ENOENT; + goto fail; } + + path->slots[0]--; leaf = path->nodes[0]; - slot = path->slots[0]; - if (ret) { - /* btrfs_search_slot() returns the slot where we'd want to - insert a backref for parent inode #0xFFFFFFFFFFFFFFFF. - The _real_ backref, telling us what the parent inode - _actually_ is, will be in the slot _before_ the one - that btrfs_search_slot() returns. */ - if (!slot) { - /* Unless there is _no_ key in the tree before... */ - btrfs_free_path(path); - return ERR_PTR(-EIO); - } - slot--; + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid != key.objectid || found_key.type != key.type) { + ret = -ENOENT; + goto fail; } - btrfs_item_key_to_cpu(leaf, &key, slot); + if (found_key.type == BTRFS_ROOT_BACKREF_KEY) { + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_root_ref); + key.objectid = btrfs_root_ref_dirid(leaf, ref); + } else { + key.objectid = found_key.offset; + } btrfs_free_path(path); - if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY) - return ERR_PTR(-EINVAL); - - objectid = key.offset; - - /* If we are already at the root of a subvol, return the real root */ - if (objectid == dir->i_ino) - return dget(dir->i_sb->s_root); + if (found_key.type == BTRFS_ROOT_BACKREF_KEY) { + return btrfs_get_dentry(root->fs_info->sb, key.objectid, + found_key.offset, 0, 0); + } - /* Build a new key for the inode item */ - key.objectid = objectid; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - - return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); + dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); + if (!IS_ERR(dentry)) + dentry->d_op = &btrfs_dentry_operations; + return dentry; +fail: + btrfs_free_path(path); + return ERR_PTR(ret); } const struct export_operations btrfs_export_ops = { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 535f85ba104..993f93ff7ba 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -32,12 +32,12 @@ #include "locking.h" #include "free-space-cache.h" -static int update_reserved_extents(struct btrfs_root *root, - u64 bytenr, u64 num, int reserve); static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc, int mark_free); +static int update_reserved_extents(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve); static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -57,10 +57,17 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, u64 parent, u64 root_objectid, u64 flags, struct btrfs_disk_key *key, int level, struct btrfs_key *ins); - static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags, int force); +static int pin_down_bytes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr, u64 num_bytes, + int is_data, int reserved, + struct extent_buffer **must_clean); +static int find_next_key(struct btrfs_path *path, int level, + struct btrfs_key *key); static noinline int block_group_cache_done(struct btrfs_block_group_cache *cache) @@ -153,34 +160,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, return ret; } -/* - * We always set EXTENT_LOCKED for the super mirror extents so we don't - * overwrite them, so those bits need to be unset. Also, if we are unmounting - * with pinned extents still sitting there because we had a block group caching, - * we need to clear those now, since we are done. - */ -void btrfs_free_pinned_extents(struct btrfs_fs_info *info) +static int add_excluded_extent(struct btrfs_root *root, + u64 start, u64 num_bytes) { - u64 start, end, last = 0; - int ret; + u64 end = start + num_bytes - 1; + set_extent_bits(&root->fs_info->freed_extents[0], + start, end, EXTENT_UPTODATE, GFP_NOFS); + set_extent_bits(&root->fs_info->freed_extents[1], + start, end, EXTENT_UPTODATE, GFP_NOFS); + return 0; +} - while (1) { - ret = find_first_extent_bit(&info->pinned_extents, last, - &start, &end, - EXTENT_LOCKED|EXTENT_DIRTY); - if (ret) - break; +static void free_excluded_extents(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) +{ + u64 start, end; - clear_extent_bits(&info->pinned_extents, start, end, - EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); - last = end+1; - } + start = cache->key.objectid; + end = start + cache->key.offset - 1; + + clear_extent_bits(&root->fs_info->freed_extents[0], + start, end, EXTENT_UPTODATE, GFP_NOFS); + clear_extent_bits(&root->fs_info->freed_extents[1], + start, end, EXTENT_UPTODATE, GFP_NOFS); } -static int remove_sb_from_cache(struct btrfs_root *root, - struct btrfs_block_group_cache *cache) +static int exclude_super_stripes(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) { - struct btrfs_fs_info *fs_info = root->fs_info; u64 bytenr; u64 *logical; int stripe_len; @@ -192,17 +199,42 @@ static int remove_sb_from_cache(struct btrfs_root *root, cache->key.objectid, bytenr, 0, &logical, &nr, &stripe_len); BUG_ON(ret); + while (nr--) { - try_lock_extent(&fs_info->pinned_extents, - logical[nr], - logical[nr] + stripe_len - 1, GFP_NOFS); + cache->bytes_super += stripe_len; + ret = add_excluded_extent(root, logical[nr], + stripe_len); + BUG_ON(ret); } + kfree(logical); } - return 0; } +static struct btrfs_caching_control * +get_caching_control(struct btrfs_block_group_cache *cache) +{ + struct btrfs_caching_control *ctl; + + spin_lock(&cache->lock); + if (cache->cached != BTRFS_CACHE_STARTED) { + spin_unlock(&cache->lock); + return NULL; + } + + ctl = cache->caching_ctl; + atomic_inc(&ctl->count); + spin_unlock(&cache->lock); + return ctl; +} + +static void put_caching_control(struct btrfs_caching_control *ctl) +{ + if (atomic_dec_and_test(&ctl->count)) + kfree(ctl); +} + /* * this is only called by cache_block_group, since we could have freed extents * we need to check the pinned_extents for any extents that can't be used yet @@ -215,9 +247,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, int ret; while (start < end) { - ret = find_first_extent_bit(&info->pinned_extents, start, + ret = find_first_extent_bit(info->pinned_extents, start, &extent_start, &extent_end, - EXTENT_DIRTY|EXTENT_LOCKED); + EXTENT_DIRTY | EXTENT_UPTODATE); if (ret) break; @@ -249,22 +281,27 @@ static int caching_kthread(void *data) { struct btrfs_block_group_cache *block_group = data; struct btrfs_fs_info *fs_info = block_group->fs_info; - u64 last = 0; + struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; + struct btrfs_root *extent_root = fs_info->extent_root; struct btrfs_path *path; - int ret = 0; - struct btrfs_key key; struct extent_buffer *leaf; - int slot; + struct btrfs_key key; u64 total_found = 0; - - BUG_ON(!fs_info); + u64 last = 0; + u32 nritems; + int ret = 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - atomic_inc(&block_group->space_info->caching_threads); + exclude_super_stripes(extent_root, block_group); + spin_lock(&block_group->space_info->lock); + block_group->space_info->bytes_super += block_group->bytes_super; + spin_unlock(&block_group->space_info->lock); + last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); + /* * We don't want to deadlock with somebody trying to allocate a new * extent for the extent root while also trying to search the extent @@ -277,74 +314,64 @@ static int caching_kthread(void *data) key.objectid = last; key.offset = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + key.type = BTRFS_EXTENT_ITEM_KEY; again: + mutex_lock(&caching_ctl->mutex); /* need to make sure the commit_root doesn't disappear */ down_read(&fs_info->extent_commit_sem); - ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) goto err; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + while (1) { smp_mb(); - if (block_group->fs_info->closing > 1) { + if (fs_info->closing > 1) { last = (u64)-1; break; } - leaf = path->nodes[0]; - slot = path->slots[0]; - if (slot >= btrfs_header_nritems(leaf)) { - ret = btrfs_next_leaf(fs_info->extent_root, path); - if (ret < 0) - goto err; - else if (ret) + if (path->slots[0] < nritems) { + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + } else { + ret = find_next_key(path, 0, &key); + if (ret) break; - if (need_resched() || - btrfs_transaction_in_commit(fs_info)) { - leaf = path->nodes[0]; - - /* this shouldn't happen, but if the - * leaf is empty just move on. - */ - if (btrfs_header_nritems(leaf) == 0) - break; - /* - * we need to copy the key out so that - * we are sure the next search advances - * us forward in the btree. - */ - btrfs_item_key_to_cpu(leaf, &key, 0); - btrfs_release_path(fs_info->extent_root, path); - up_read(&fs_info->extent_commit_sem); + caching_ctl->progress = last; + btrfs_release_path(extent_root, path); + up_read(&fs_info->extent_commit_sem); + mutex_unlock(&caching_ctl->mutex); + if (btrfs_transaction_in_commit(fs_info)) schedule_timeout(1); - goto again; - } + else + cond_resched(); + goto again; + } + if (key.objectid < block_group->key.objectid) { + path->slots[0]++; continue; } - btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid < block_group->key.objectid) - goto next; if (key.objectid >= block_group->key.objectid + block_group->key.offset) break; - if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { + if (key.type == BTRFS_EXTENT_ITEM_KEY) { total_found += add_new_free_space(block_group, fs_info, last, key.objectid); last = key.objectid + key.offset; - } - if (total_found > (1024 * 1024 * 2)) { - total_found = 0; - wake_up(&block_group->caching_q); + if (total_found > (1024 * 1024 * 2)) { + total_found = 0; + wake_up(&caching_ctl->wait); + } } -next: path->slots[0]++; } ret = 0; @@ -352,33 +379,65 @@ next: total_found += add_new_free_space(block_group, fs_info, last, block_group->key.objectid + block_group->key.offset); + caching_ctl->progress = (u64)-1; spin_lock(&block_group->lock); + block_group->caching_ctl = NULL; block_group->cached = BTRFS_CACHE_FINISHED; spin_unlock(&block_group->lock); err: btrfs_free_path(path); up_read(&fs_info->extent_commit_sem); - atomic_dec(&block_group->space_info->caching_threads); - wake_up(&block_group->caching_q); + free_excluded_extents(extent_root, block_group); + + mutex_unlock(&caching_ctl->mutex); + wake_up(&caching_ctl->wait); + + put_caching_control(caching_ctl); + atomic_dec(&block_group->space_info->caching_threads); return 0; } static int cache_block_group(struct btrfs_block_group_cache *cache) { + struct btrfs_fs_info *fs_info = cache->fs_info; + struct btrfs_caching_control *caching_ctl; struct task_struct *tsk; int ret = 0; + smp_mb(); + if (cache->cached != BTRFS_CACHE_NO) + return 0; + + caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); + BUG_ON(!caching_ctl); + + INIT_LIST_HEAD(&caching_ctl->list); + mutex_init(&caching_ctl->mutex); + init_waitqueue_head(&caching_ctl->wait); + caching_ctl->block_group = cache; + caching_ctl->progress = cache->key.objectid; + /* one for caching kthread, one for caching block group list */ + atomic_set(&caching_ctl->count, 2); + spin_lock(&cache->lock); if (cache->cached != BTRFS_CACHE_NO) { spin_unlock(&cache->lock); - return ret; + kfree(caching_ctl); + return 0; } + cache->caching_ctl = caching_ctl; cache->cached = BTRFS_CACHE_STARTED; spin_unlock(&cache->lock); + down_write(&fs_info->extent_commit_sem); + list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); + up_write(&fs_info->extent_commit_sem); + + atomic_inc(&cache->space_info->caching_threads); + tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", cache->key.objectid); if (IS_ERR(tsk)) { @@ -1657,7 +1716,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, parent, ref_root, flags, ref->objectid, ref->offset, &ins, node->ref_mod); - update_reserved_extents(root, ins.objectid, ins.offset, 0); } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, node->num_bytes, parent, @@ -1783,7 +1841,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, extent_op->flags_to_set, &extent_op->key, ref->level, &ins); - update_reserved_extents(root, ins.objectid, ins.offset, 0); } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, node->num_bytes, parent, ref_root, @@ -1818,16 +1875,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, BUG_ON(extent_op); head = btrfs_delayed_node_to_head(node); if (insert_reserved) { + int mark_free = 0; + struct extent_buffer *must_clean = NULL; + + ret = pin_down_bytes(trans, root, NULL, + node->bytenr, node->num_bytes, + head->is_data, 1, &must_clean); + if (ret > 0) + mark_free = 1; + + if (must_clean) { + clean_tree_block(NULL, root, must_clean); + btrfs_tree_unlock(must_clean); + free_extent_buffer(must_clean); + } if (head->is_data) { ret = btrfs_del_csums(trans, root, node->bytenr, node->num_bytes); BUG_ON(ret); } - btrfs_update_pinned_extents(root, node->bytenr, - node->num_bytes, 1); - update_reserved_extents(root, node->bytenr, - node->num_bytes, 0); + if (mark_free) { + ret = btrfs_free_reserved_extent(root, + node->bytenr, + node->num_bytes); + BUG_ON(ret); + } } mutex_unlock(&head->mutex); return 0; @@ -2706,6 +2779,8 @@ int btrfs_check_metadata_free_space(struct btrfs_root *root) /* get the space info for where the metadata will live */ alloc_target = btrfs_get_alloc_profile(root, 0); meta_sinfo = __find_space_info(info, alloc_target); + if (!meta_sinfo) + goto alloc; again: spin_lock(&meta_sinfo->lock); @@ -2717,12 +2792,13 @@ again: do_div(thresh, 100); if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + - meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { + meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + + meta_sinfo->bytes_super > thresh) { struct btrfs_trans_handle *trans; if (!meta_sinfo->full) { meta_sinfo->force_alloc = 1; spin_unlock(&meta_sinfo->lock); - +alloc: trans = btrfs_start_transaction(root, 1); if (!trans) return -ENOMEM; @@ -2730,6 +2806,10 @@ again: ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2 * 1024 * 1024, alloc_target, 0); btrfs_end_transaction(trans, root); + if (!meta_sinfo) { + meta_sinfo = __find_space_info(info, + alloc_target); + } goto again; } spin_unlock(&meta_sinfo->lock); @@ -2765,13 +2845,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); data_sinfo = BTRFS_I(inode)->space_info; + if (!data_sinfo) + goto alloc; + again: /* make sure we have enough space to handle the data first */ spin_lock(&data_sinfo->lock); if (data_sinfo->total_bytes - data_sinfo->bytes_used - data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - - data_sinfo->bytes_may_use < bytes) { + data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { struct btrfs_trans_handle *trans; /* @@ -2783,7 +2866,7 @@ again: data_sinfo->force_alloc = 1; spin_unlock(&data_sinfo->lock); - +alloc: alloc_target = btrfs_get_alloc_profile(root, 1); trans = btrfs_start_transaction(root, 1); if (!trans) @@ -2795,6 +2878,11 @@ again: btrfs_end_transaction(trans, root); if (ret) return ret; + + if (!data_sinfo) { + btrfs_set_inode_space_info(root, inode); + data_sinfo = BTRFS_I(inode)->space_info; + } goto again; } spin_unlock(&data_sinfo->lock); @@ -3009,10 +3097,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { old_val += num_bytes; + btrfs_set_block_group_used(&cache->item, old_val); + cache->reserved -= num_bytes; cache->space_info->bytes_used += num_bytes; + cache->space_info->bytes_reserved -= num_bytes; if (cache->ro) cache->space_info->bytes_readonly -= num_bytes; - btrfs_set_block_group_used(&cache->item, old_val); spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); } else { @@ -3057,127 +3147,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) return bytenr; } -int btrfs_update_pinned_extents(struct btrfs_root *root, - u64 bytenr, u64 num, int pin) +/* + * this function must be called within transaction + */ +int btrfs_pin_extent(struct btrfs_root *root, + u64 bytenr, u64 num_bytes, int reserved) { - u64 len; - struct btrfs_block_group_cache *cache; struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_group_cache *cache; - if (pin) - set_extent_dirty(&fs_info->pinned_extents, - bytenr, bytenr + num - 1, GFP_NOFS); - - while (num > 0) { - cache = btrfs_lookup_block_group(fs_info, bytenr); - BUG_ON(!cache); - len = min(num, cache->key.offset - - (bytenr - cache->key.objectid)); - if (pin) { - spin_lock(&cache->space_info->lock); - spin_lock(&cache->lock); - cache->pinned += len; - cache->space_info->bytes_pinned += len; - spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); - fs_info->total_pinned += len; - } else { - int unpin = 0; + cache = btrfs_lookup_block_group(fs_info, bytenr); + BUG_ON(!cache); - /* - * in order to not race with the block group caching, we - * only want to unpin the extent if we are cached. If - * we aren't cached, we want to start async caching this - * block group so we can free the extent the next time - * around. - */ - spin_lock(&cache->space_info->lock); - spin_lock(&cache->lock); - unpin = (cache->cached == BTRFS_CACHE_FINISHED); - if (likely(unpin)) { - cache->pinned -= len; - cache->space_info->bytes_pinned -= len; - fs_info->total_pinned -= len; - } - spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); + spin_lock(&cache->space_info->lock); + spin_lock(&cache->lock); + cache->pinned += num_bytes; + cache->space_info->bytes_pinned += num_bytes; + if (reserved) { + cache->reserved -= num_bytes; + cache->space_info->bytes_reserved -= num_bytes; + } + spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); - if (likely(unpin)) - clear_extent_dirty(&fs_info->pinned_extents, - bytenr, bytenr + len -1, - GFP_NOFS); - else - cache_block_group(cache); + btrfs_put_block_group(cache); - if (unpin) - btrfs_add_free_space(cache, bytenr, len); - } - btrfs_put_block_group(cache); - bytenr += len; - num -= len; + set_extent_dirty(fs_info->pinned_extents, + bytenr, bytenr + num_bytes - 1, GFP_NOFS); + return 0; +} + +static int update_reserved_extents(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve) +{ + spin_lock(&cache->space_info->lock); + spin_lock(&cache->lock); + if (reserve) { + cache->reserved += num_bytes; + cache->space_info->bytes_reserved += num_bytes; + } else { + cache->reserved -= num_bytes; + cache->space_info->bytes_reserved -= num_bytes; } + spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); return 0; } -static int update_reserved_extents(struct btrfs_root *root, - u64 bytenr, u64 num, int reserve) +int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { - u64 len; - struct btrfs_block_group_cache *cache; struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_caching_control *next; + struct btrfs_caching_control *caching_ctl; + struct btrfs_block_group_cache *cache; - while (num > 0) { - cache = btrfs_lookup_block_group(fs_info, bytenr); - BUG_ON(!cache); - len = min(num, cache->key.offset - - (bytenr - cache->key.objectid)); + down_write(&fs_info->extent_commit_sem); - spin_lock(&cache->space_info->lock); - spin_lock(&cache->lock); - if (reserve) { - cache->reserved += len; - cache->space_info->bytes_reserved += len; + list_for_each_entry_safe(caching_ctl, next, + &fs_info->caching_block_groups, list) { + cache = caching_ctl->block_group; + if (block_group_cache_done(cache)) { + cache->last_byte_to_unpin = (u64)-1; + list_del_init(&caching_ctl->list); + put_caching_control(caching_ctl); } else { - cache->reserved -= len; - cache->space_info->bytes_reserved -= len; + cache->last_byte_to_unpin = caching_ctl->progress; } - spin_unlock(&cache->lock); - spin_unlock(&cache->space_info->lock); - btrfs_put_block_group(cache); - bytenr += len; - num -= len; } + + if (fs_info->pinned_extents == &fs_info->freed_extents[0]) + fs_info->pinned_extents = &fs_info->freed_extents[1]; + else + fs_info->pinned_extents = &fs_info->freed_extents[0]; + + up_write(&fs_info->extent_commit_sem); return 0; } -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) +static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) { - u64 last = 0; - u64 start; - u64 end; - struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; - int ret; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_group_cache *cache = NULL; + u64 len; - while (1) { - ret = find_first_extent_bit(pinned_extents, last, - &start, &end, EXTENT_DIRTY); - if (ret) - break; + while (start <= end) { + if (!cache || + start >= cache->key.objectid + cache->key.offset) { + if (cache) + btrfs_put_block_group(cache); + cache = btrfs_lookup_block_group(fs_info, start); + BUG_ON(!cache); + } + + len = cache->key.objectid + cache->key.offset - start; + len = min(len, end + 1 - start); - set_extent_dirty(copy, start, end, GFP_NOFS); - last = end + 1; + if (start < cache->last_byte_to_unpin) { + len = min(len, cache->last_byte_to_unpin - start); + btrfs_add_free_space(cache, start, len); + } + + spin_lock(&cache->space_info->lock); + spin_lock(&cache->lock); + cache->pinned -= len; + cache->space_info->bytes_pinned -= len; + spin_unlock(&cache->lock); + spin_unlock(&cache->space_info->lock); + + start += len; } + + if (cache) + btrfs_put_block_group(cache); return 0; } int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_io_tree *unpin) + struct btrfs_root *root) { + struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_io_tree *unpin; u64 start; u64 end; int ret; + if (fs_info->pinned_extents == &fs_info->freed_extents[0]) + unpin = &fs_info->freed_extents[1]; + else + unpin = &fs_info->freed_extents[0]; + while (1) { ret = find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY); @@ -3186,10 +3285,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, ret = btrfs_discard_extent(root, start, end + 1 - start); - /* unlocks the pinned mutex */ - btrfs_update_pinned_extents(root, start, end + 1 - start, 0); clear_extent_dirty(unpin, start, end, GFP_NOFS); - + unpin_extent_range(root, start, end); cond_resched(); } @@ -3199,7 +3296,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, static int pin_down_bytes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - u64 bytenr, u64 num_bytes, int is_data, + u64 bytenr, u64 num_bytes, + int is_data, int reserved, struct extent_buffer **must_clean) { int err = 0; @@ -3231,15 +3329,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, } free_extent_buffer(buf); pinit: - btrfs_set_path_blocking(path); + if (path) + btrfs_set_path_blocking(path); /* unlocks the pinned mutex */ - btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); + btrfs_pin_extent(root, bytenr, num_bytes, reserved); BUG_ON(err < 0); return 0; } - static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -3413,7 +3511,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } ret = pin_down_bytes(trans, root, path, bytenr, - num_bytes, is_data, &must_clean); + num_bytes, is_data, 0, &must_clean); if (ret > 0) mark_free = 1; BUG_ON(ret < 0); @@ -3544,8 +3642,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); /* unlocks the pinned mutex */ - btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); - update_reserved_extents(root, bytenr, num_bytes, 0); + btrfs_pin_extent(root, bytenr, num_bytes, 1); ret = 0; } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, @@ -3585,19 +3682,33 @@ static noinline int wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, u64 num_bytes) { + struct btrfs_caching_control *caching_ctl; DEFINE_WAIT(wait); - prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); - - if (block_group_cache_done(cache)) { - finish_wait(&cache->caching_q, &wait); + caching_ctl = get_caching_control(cache); + if (!caching_ctl) return 0; - } - schedule(); - finish_wait(&cache->caching_q, &wait); - wait_event(cache->caching_q, block_group_cache_done(cache) || + wait_event(caching_ctl->wait, block_group_cache_done(cache) || (cache->free_space >= num_bytes)); + + put_caching_control(caching_ctl); + return 0; +} + +static noinline int +wait_block_group_cache_done(struct btrfs_block_group_cache *cache) +{ + struct btrfs_caching_control *caching_ctl; + DEFINE_WAIT(wait); + + caching_ctl = get_caching_control(cache); + if (!caching_ctl) + return 0; + + wait_event(caching_ctl->wait, block_group_cache_done(cache)); + + put_caching_control(caching_ctl); return 0; } @@ -3635,6 +3746,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, int last_ptr_loop = 0; int loop = 0; bool found_uncached_bg = false; + bool failed_cluster_refill = false; WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); @@ -3732,7 +3844,16 @@ have_block_group: if (unlikely(block_group->ro)) goto loop; - if (last_ptr) { + /* + * Ok we want to try and use the cluster allocator, so lets look + * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will + * have tried the cluster allocator plenty of times at this + * point and not have found anything, so we are likely way too + * fragmented for the clustering stuff to find anything, so lets + * just skip it and let the allocator find whatever block it can + * find + */ + if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { /* * the refill lock keeps out other * people trying to start a new cluster @@ -3807,9 +3928,11 @@ refill_cluster: spin_unlock(&last_ptr->refill_lock); goto checks; } - } else if (!cached && loop > LOOP_CACHING_NOWAIT) { + } else if (!cached && loop > LOOP_CACHING_NOWAIT + && !failed_cluster_refill) { spin_unlock(&last_ptr->refill_lock); + failed_cluster_refill = true; wait_block_group_cache_progress(block_group, num_bytes + empty_cluster + empty_size); goto have_block_group; @@ -3821,13 +3944,9 @@ refill_cluster: * cluster. Free the cluster we've been trying * to use, and go to the next block group */ - if (loop < LOOP_NO_EMPTY_SIZE) { - btrfs_return_cluster_to_free_space(NULL, - last_ptr); - spin_unlock(&last_ptr->refill_lock); - goto loop; - } + btrfs_return_cluster_to_free_space(NULL, last_ptr); spin_unlock(&last_ptr->refill_lock); + goto loop; } offset = btrfs_find_space_for_alloc(block_group, search_start, @@ -3881,9 +4000,12 @@ checks: search_start - offset); BUG_ON(offset > search_start); + update_reserved_extents(block_group, num_bytes, 1); + /* we are all good, lets return */ break; loop: + failed_cluster_refill = false; btrfs_put_block_group(block_group); } up_read(&space_info->groups_sem); @@ -3973,12 +4095,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) up_read(&info->groups_sem); } -static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 num_bytes, u64 min_alloc_size, - u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, - u64 data) +int btrfs_reserve_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 num_bytes, u64 min_alloc_size, + u64 empty_size, u64 hint_byte, + u64 search_end, struct btrfs_key *ins, + u64 data) { int ret; u64 search_start = 0; @@ -4044,25 +4166,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) ret = btrfs_discard_extent(root, start, len); btrfs_add_free_space(cache, start, len); + update_reserved_extents(cache, len, 0); btrfs_put_block_group(cache); - update_reserved_extents(root, start, len, 0); - - return ret; -} - -int btrfs_reserve_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 num_bytes, u64 min_alloc_size, - u64 empty_size, u64 hint_byte, - u64 search_end, struct btrfs_key *ins, - u64 data) -{ - int ret; - ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, - empty_size, hint_byte, search_end, ins, - data); - if (!ret) - update_reserved_extents(root, ins->objectid, ins->offset, 1); return ret; } @@ -4223,15 +4328,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, { int ret; struct btrfs_block_group_cache *block_group; + struct btrfs_caching_control *caching_ctl; + u64 start = ins->objectid; + u64 num_bytes = ins->offset; block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); cache_block_group(block_group); - wait_event(block_group->caching_q, - block_group_cache_done(block_group)); + caching_ctl = get_caching_control(block_group); - ret = btrfs_remove_free_space(block_group, ins->objectid, - ins->offset); - BUG_ON(ret); + if (!caching_ctl) { + BUG_ON(!block_group_cache_done(block_group)); + ret = btrfs_remove_free_space(block_group, start, num_bytes); + BUG_ON(ret); + } else { + mutex_lock(&caching_ctl->mutex); + + if (start >= caching_ctl->progress) { + ret = add_excluded_extent(root, start, num_bytes); + BUG_ON(ret); + } else if (start + num_bytes <= caching_ctl->progress) { + ret = btrfs_remove_free_space(block_group, + start, num_bytes); + BUG_ON(ret); + } else { + num_bytes = caching_ctl->progress - start; + ret = btrfs_remove_free_space(block_group, + start, num_bytes); + BUG_ON(ret); + + start = caching_ctl->progress; + num_bytes = ins->objectid + ins->offset - + caching_ctl->progress; + ret = add_excluded_extent(root, start, num_bytes); + BUG_ON(ret); + } + + mutex_unlock(&caching_ctl->mutex); + put_caching_control(caching_ctl); + } + + update_reserved_extents(block_group, ins->offset, 1); btrfs_put_block_group(block_group); ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 0, owner, offset, ins, 1); @@ -4255,9 +4391,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, int ret; u64 flags = 0; - ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, - empty_size, hint_byte, search_end, - ins, 0); + ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, + empty_size, hint_byte, search_end, + ins, 0); if (ret) return ret; @@ -4268,7 +4404,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, } else BUG_ON(parent > 0); - update_reserved_extents(root, ins->objectid, ins->offset, 1); if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { struct btrfs_delayed_extent_op *extent_op; extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); @@ -4347,452 +4482,99 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } -#if 0 -int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct extent_buffer *leaf) -{ - u64 disk_bytenr; - u64 num_bytes; - struct btrfs_key key; - struct btrfs_file_extent_item *fi; - u32 nritems; - int i; - int ret; - - BUG_ON(!btrfs_is_leaf(leaf)); - nritems = btrfs_header_nritems(leaf); - - for (i = 0; i < nritems; i++) { - cond_resched(); - btrfs_item_key_to_cpu(leaf, &key, i); - - /* only extents have references, skip everything else */ - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) - continue; - - fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - - /* inline extents live in the btree, they don't have refs */ - if (btrfs_file_extent_type(leaf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - - disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); - - /* holes don't have refs */ - if (disk_bytenr == 0) - continue; - - num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); - ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes, - leaf->start, 0, key.objectid, 0); - BUG_ON(ret); - } - return 0; -} - -static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_leaf_ref *ref) -{ - int i; - int ret; - struct btrfs_extent_info *info; - struct refsort *sorted; - - if (ref->nritems == 0) - return 0; - - sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); - for (i = 0; i < ref->nritems; i++) { - sorted[i].bytenr = ref->extents[i].bytenr; - sorted[i].slot = i; - } - sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); - - /* - * the items in the ref were sorted when the ref was inserted - * into the ref cache, so this is already in order - */ - for (i = 0; i < ref->nritems; i++) { - info = ref->extents + sorted[i].slot; - ret = btrfs_free_extent(trans, root, info->bytenr, - info->num_bytes, ref->bytenr, - ref->owner, ref->generation, - info->objectid, 0); - - atomic_inc(&root->fs_info->throttle_gen); - wake_up(&root->fs_info->transaction_throttle); - cond_resched(); - - BUG_ON(ret); - info++; - } - - kfree(sorted); - return 0; -} - - -static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 start, - u64 len, u32 *refs) -{ - int ret; - - ret = btrfs_lookup_extent_refs(trans, root, start, len, refs); - BUG_ON(ret); - -#if 0 /* some debugging code in case we see problems here */ - /* if the refs count is one, it won't get increased again. But - * if the ref count is > 1, someone may be decreasing it at - * the same time we are. - */ - if (*refs != 1) { - struct extent_buffer *eb = NULL; - eb = btrfs_find_create_tree_block(root, start, len); - if (eb) - btrfs_tree_lock(eb); - - mutex_lock(&root->fs_info->alloc_mutex); - ret = lookup_extent_ref(NULL, root, start, len, refs); - BUG_ON(ret); - mutex_unlock(&root->fs_info->alloc_mutex); - - if (eb) { - btrfs_tree_unlock(eb); - free_extent_buffer(eb); - } - if (*refs == 1) { - printk(KERN_ERR "btrfs block %llu went down to one " - "during drop_snap\n", (unsigned long long)start); - } - - } -#endif - - cond_resched(); - return ret; -} +struct walk_control { + u64 refs[BTRFS_MAX_LEVEL]; + u64 flags[BTRFS_MAX_LEVEL]; + struct btrfs_key update_progress; + int stage; + int level; + int shared_level; + int update_ref; + int keep_locks; + int reada_slot; + int reada_count; +}; +#define DROP_REFERENCE 1 +#define UPDATE_BACKREF 2 -/* - * this is used while deleting old snapshots, and it drops the refs - * on a whole subtree starting from a level 1 node. - * - * The idea is to sort all the leaf pointers, and then drop the - * ref on all the leaves in order. Most of the time the leaves - * will have ref cache entries, so no leaf IOs will be required to - * find the extents they have references on. - * - * For each leaf, any references it has are also dropped in order - * - * This ends up dropping the references in something close to optimal - * order for reading and modifying the extent allocation tree. - */ -static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path) +static noinline void reada_walk_down(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct walk_control *wc, + struct btrfs_path *path) { u64 bytenr; - u64 root_owner; - u64 root_gen; - struct extent_buffer *eb = path->nodes[1]; - struct extent_buffer *leaf; - struct btrfs_leaf_ref *ref; - struct refsort *sorted = NULL; - int nritems = btrfs_header_nritems(eb); + u64 generation; + u64 refs; + u64 last = 0; + u32 nritems; + u32 blocksize; + struct btrfs_key key; + struct extent_buffer *eb; int ret; - int i; - int refi = 0; - int slot = path->slots[1]; - u32 blocksize = btrfs_level_size(root, 0); - u32 refs; - - if (nritems == 0) - goto out; - - root_owner = btrfs_header_owner(eb); - root_gen = btrfs_header_generation(eb); - sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); + int slot; + int nread = 0; - /* - * step one, sort all the leaf pointers so we don't scribble - * randomly into the extent allocation tree - */ - for (i = slot; i < nritems; i++) { - sorted[refi].bytenr = btrfs_node_blockptr(eb, i); - sorted[refi].slot = i; - refi++; + if (path->slots[wc->level] < wc->reada_slot) { + wc->reada_count = wc->reada_count * 2 / 3; + wc->reada_count = max(wc->reada_count, 2); + } else { + wc->reada_count = wc->reada_count * 3 / 2; + wc->reada_count = min_t(int, wc->reada_count, + BTRFS_NODEPTRS_PER_BLOCK(root)); } - /* - * nritems won't be zero, but if we're picking up drop_snapshot - * after a crash, slot might be > 0, so double check things - * just in case. - */ - if (refi == 0) - goto out; + eb = path->nodes[wc->level]; + nritems = btrfs_header_nritems(eb); + blocksize = btrfs_level_size(root, wc->level - 1); - sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); + for (slot = path->slots[wc->level]; slot < nritems; slot++) { + if (nread >= wc->reada_count) + break; - /* - * the first loop frees everything the leaves point to - */ - for (i = 0; i < refi; i++) { - u64 ptr_gen; + cond_resched(); + bytenr = btrfs_node_blockptr(eb, slot); + generation = btrfs_node_ptr_generation(eb, slot); - bytenr = sorted[i].bytenr; + if (slot == path->slots[wc->level]) + goto reada; - /* - * check the reference count on this leaf. If it is > 1 - * we just decrement it below and don't update any - * of the refs the leaf points to. - */ - ret = drop_snap_lookup_refcount(trans, root, bytenr, - blocksize, &refs); - BUG_ON(ret); - if (refs != 1) + if (wc->stage == UPDATE_BACKREF && + generation <= root->root_key.offset) continue; - ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); - - /* - * the leaf only had one reference, which means the - * only thing pointing to this leaf is the snapshot - * we're deleting. It isn't possible for the reference - * count to increase again later - * - * The reference cache is checked for the leaf, - * and if found we'll be able to drop any refs held by - * the leaf without needing to read it in. - */ - ref = btrfs_lookup_leaf_ref(root, bytenr); - if (ref && ref->generation != ptr_gen) { - btrfs_free_leaf_ref(root, ref); - ref = NULL; - } - if (ref) { - ret = cache_drop_leaf_ref(trans, root, ref); - BUG_ON(ret); - btrfs_remove_leaf_ref(root, ref); - btrfs_free_leaf_ref(root, ref); - } else { - /* - * the leaf wasn't in the reference cache, so - * we have to read it. - */ - leaf = read_tree_block(root, bytenr, blocksize, - ptr_gen); - ret = btrfs_drop_leaf_ref(trans, root, leaf); + if (wc->stage == DROP_REFERENCE) { + ret = btrfs_lookup_extent_info(trans, root, + bytenr, blocksize, + &refs, NULL); BUG_ON(ret); - free_extent_buffer(leaf); - } - atomic_inc(&root->fs_info->throttle_gen); - wake_up(&root->fs_info->transaction_throttle); - cond_resched(); - } - - /* - * run through the loop again to free the refs on the leaves. - * This is faster than doing it in the loop above because - * the leaves are likely to be clustered together. We end up - * working in nice chunks on the extent allocation tree. - */ - for (i = 0; i < refi; i++) { - bytenr = sorted[i].bytenr; - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, eb->start, - root_owner, root_gen, 0, 1); - BUG_ON(ret); - - atomic_inc(&root->fs_info->throttle_gen); - wake_up(&root->fs_info->transaction_throttle); - cond_resched(); - } -out: - kfree(sorted); - - /* - * update the path to show we've processed the entire level 1 - * node. This will get saved into the root's drop_snapshot_progress - * field so these drops are not repeated again if this transaction - * commits. - */ - path->slots[1] = nritems; - return 0; -} - -/* - * helper function for drop_snapshot, this walks down the tree dropping ref - * counts as it goes. - */ -static noinline int walk_down_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, int *level) -{ - u64 root_owner; - u64 root_gen; - u64 bytenr; - u64 ptr_gen; - struct extent_buffer *next; - struct extent_buffer *cur; - struct extent_buffer *parent; - u32 blocksize; - int ret; - u32 refs; - - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, - path->nodes[*level]->len, &refs); - BUG_ON(ret); - if (refs > 1) - goto out; - - /* - * walk down to the last node level and free all the leaves - */ - while (*level >= 0) { - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - cur = path->nodes[*level]; - - if (btrfs_header_level(cur) != *level) - WARN_ON(1); + BUG_ON(refs == 0); + if (refs == 1) + goto reada; - if (path->slots[*level] >= - btrfs_header_nritems(cur)) - break; - - /* the new code goes down to level 1 and does all the - * leaves pointed to that node in bulk. So, this check - * for level 0 will always be false. - * - * But, the disk format allows the drop_snapshot_progress - * field in the root to leave things in a state where - * a leaf will need cleaning up here. If someone crashes - * with the old code and then boots with the new code, - * we might find a leaf here. - */ - if (*level == 0) { - ret = btrfs_drop_leaf_ref(trans, root, cur); - BUG_ON(ret); - break; + if (!wc->update_ref || + generation <= root->root_key.offset) + continue; + btrfs_node_key_to_cpu(eb, &key, slot); + ret = btrfs_comp_cpu_keys(&key, + &wc->update_progress); + if (ret < 0) + continue; } - - /* - * once we get to level one, process the whole node - * at once, including everything below it. - */ - if (*level == 1) { - ret = drop_level_one_refs(trans, root, path); - BUG_ON(ret); +reada: + ret = readahead_tree_block(root, bytenr, blocksize, + generation); + if (ret) break; - } - - bytenr = btrfs_node_blockptr(cur, path->slots[*level]); - ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); - blocksize = btrfs_level_size(root, *level - 1); - - ret = drop_snap_lookup_refcount(trans, root, bytenr, - blocksize, &refs); - BUG_ON(ret); - - /* - * if there is more than one reference, we don't need - * to read that node to drop any references it has. We - * just drop the ref we hold on that node and move on to the - * next slot in this level. - */ - if (refs != 1) { - parent = path->nodes[*level]; - root_owner = btrfs_header_owner(parent); - root_gen = btrfs_header_generation(parent); - path->slots[*level]++; - - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, parent->start, - root_owner, root_gen, - *level - 1, 1); - BUG_ON(ret); - - atomic_inc(&root->fs_info->throttle_gen); - wake_up(&root->fs_info->transaction_throttle); - cond_resched(); - - continue; - } - - /* - * we need to keep freeing things in the next level down. - * read the block and loop around to process it - */ - next = read_tree_block(root, bytenr, blocksize, ptr_gen); - WARN_ON(*level <= 0); - if (path->nodes[*level-1]) - free_extent_buffer(path->nodes[*level-1]); - path->nodes[*level-1] = next; - *level = btrfs_header_level(next); - path->slots[*level] = 0; - cond_resched(); + last = bytenr + blocksize; + nread++; } -out: - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - - if (path->nodes[*level] == root->node) { - parent = path->nodes[*level]; - bytenr = path->nodes[*level]->start; - } else { - parent = path->nodes[*level + 1]; - bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); - } - - blocksize = btrfs_level_size(root, *level); - root_owner = btrfs_header_owner(parent); - root_gen = btrfs_header_generation(parent); - - /* - * cleanup and free the reference on the last node - * we processed - */ - ret = btrfs_free_extent(trans, root, bytenr, blocksize, - parent->start, root_owner, root_gen, - *level, 1); - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - - *level += 1; - BUG_ON(ret); - - cond_resched(); - return 0; + wc->reada_slot = slot; } -#endif - -struct walk_control { - u64 refs[BTRFS_MAX_LEVEL]; - u64 flags[BTRFS_MAX_LEVEL]; - struct btrfs_key update_progress; - int stage; - int level; - int shared_level; - int update_ref; - int keep_locks; -}; - -#define DROP_REFERENCE 1 -#define UPDATE_BACKREF 2 /* * hepler to process tree block while walking down the tree. * - * when wc->stage == DROP_REFERENCE, this function checks - * reference count of the block. if the block is shared and - * we need update back refs for the subtree rooted at the - * block, this function changes wc->stage to UPDATE_BACKREF - * * when wc->stage == UPDATE_BACKREF, this function updates * back refs for pointers in the block. * @@ -4805,7 +4587,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, { int level = wc->level; struct extent_buffer *eb = path->nodes[level]; - struct btrfs_key key; u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; int ret; @@ -4828,21 +4609,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, BUG_ON(wc->refs[level] == 0); } - if (wc->stage == DROP_REFERENCE && - wc->update_ref && wc->refs[level] > 1) { - BUG_ON(eb == root->node); - BUG_ON(path->slots[level] > 0); - if (level == 0) - btrfs_item_key_to_cpu(eb, &key, path->slots[level]); - else - btrfs_node_key_to_cpu(eb, &key, path->slots[level]); - if (btrfs_header_owner(eb) == root->root_key.objectid && - btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { - wc->stage = UPDATE_BACKREF; - wc->shared_level = level; - } - } - if (wc->stage == DROP_REFERENCE) { if (wc->refs[level] > 1) return 1; @@ -4879,6 +4645,123 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, } /* + * hepler to process tree block pointer. + * + * when wc->stage == DROP_REFERENCE, this function checks + * reference count of the block pointed to. if the block + * is shared and we need update back refs for the subtree + * rooted at the block, this function changes wc->stage to + * UPDATE_BACKREF. if the block is shared and there is no + * need to update back, this function drops the reference + * to the block. + * + * NOTE: return value 1 means we should stop walking down. + */ +static noinline int do_walk_down(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct walk_control *wc) +{ + u64 bytenr; + u64 generation; + u64 parent; + u32 blocksize; + struct btrfs_key key; + struct extent_buffer *next; + int level = wc->level; + int reada = 0; + int ret = 0; + + generation = btrfs_node_ptr_generation(path->nodes[level], + path->slots[level]); + /* + * if the lower level block was created before the snapshot + * was created, we know there is no need to update back refs + * for the subtree + */ + if (wc->stage == UPDATE_BACKREF && + generation <= root->root_key.offset) + return 1; + + bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); + blocksize = btrfs_level_size(root, level - 1); + + next = btrfs_find_tree_block(root, bytenr, blocksize); + if (!next) { + next = btrfs_find_create_tree_block(root, bytenr, blocksize); + reada = 1; + } + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + + if (wc->stage == DROP_REFERENCE) { + ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, + &wc->refs[level - 1], + &wc->flags[level - 1]); + BUG_ON(ret); + BUG_ON(wc->refs[level - 1] == 0); + + if (wc->refs[level - 1] > 1) { + if (!wc->update_ref || + generation <= root->root_key.offset) + goto skip; + + btrfs_node_key_to_cpu(path->nodes[level], &key, + path->slots[level]); + ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); + if (ret < 0) + goto skip; + + wc->stage = UPDATE_BACKREF; + wc->shared_level = level - 1; + } + } + + if (!btrfs_buffer_uptodate(next, generation)) { + btrfs_tree_unlock(next); + free_extent_buffer(next); + next = NULL; + } + + if (!next) { + if (reada && level == 1) + reada_walk_down(trans, root, wc, path); + next = read_tree_block(root, bytenr, blocksize, generation); + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + } + + level--; + BUG_ON(level != btrfs_header_level(next)); + path->nodes[level] = next; + path->slots[level] = 0; + path->locks[level] = 1; + wc->level = level; + if (wc->level == 1) + wc->reada_slot = 0; + return 0; +skip: + wc->refs[level - 1] = 0; + wc->flags[level - 1] = 0; + + if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + parent = path->nodes[level]->start; + } else { + BUG_ON(root->root_key.objectid != + btrfs_header_owner(path->nodes[level])); + parent = 0; + } + + ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, + root->root_key.objectid, level - 1, 0); + BUG_ON(ret); + + btrfs_tree_unlock(next); + free_extent_buffer(next); + return 1; +} + +/* * hepler to process tree block while walking up the tree. * * when wc->stage == DROP_REFERENCE, this function drops @@ -4905,7 +4788,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, if (level < wc->shared_level) goto out; - BUG_ON(wc->refs[level] <= 1); ret = find_next_key(path, level + 1, &wc->update_progress); if (ret > 0) wc->update_ref = 0; @@ -4936,8 +4818,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, path->locks[level] = 0; return 1; } - } else { - BUG_ON(level != 0); } } @@ -4990,17 +4870,13 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_path *path, struct walk_control *wc) { - struct extent_buffer *next; - struct extent_buffer *cur; - u64 bytenr; - u64 ptr_gen; - u32 blocksize; int level = wc->level; int ret; while (level >= 0) { - cur = path->nodes[level]; - BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); + if (path->slots[level] >= + btrfs_header_nritems(path->nodes[level])) + break; ret = walk_down_proc(trans, root, path, wc); if (ret > 0) @@ -5009,20 +4885,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, if (level == 0) break; - bytenr = btrfs_node_blockptr(cur, path->slots[level]); - blocksize = btrfs_level_size(root, level - 1); - ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); - - next = read_tree_block(root, bytenr, blocksize, ptr_gen); - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); - - level--; - BUG_ON(level != btrfs_header_level(next)); - path->nodes[level] = next; - path->slots[level] = 0; - path->locks[level] = 1; - wc->level = level; + ret = do_walk_down(trans, root, path, wc); + if (ret > 0) { + path->slots[level]++; + continue; + } + level = wc->level; } return 0; } @@ -5112,9 +4980,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) err = ret; goto out; } - btrfs_node_key_to_cpu(path->nodes[level], &key, - path->slots[level]); - WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); + WARN_ON(ret > 0); /* * unlock our path, this is safe because only this @@ -5149,6 +5015,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) wc->stage = DROP_REFERENCE; wc->update_ref = update_ref; wc->keep_locks = 0; + wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); while (1) { ret = walk_down_tree(trans, root, path, wc); @@ -5201,9 +5068,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) ret = btrfs_del_root(trans, tree_root, &root->root_key); BUG_ON(ret); - free_extent_buffer(root->node); - free_extent_buffer(root->commit_root); - kfree(root); + if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { + ret = btrfs_find_last_root(tree_root, root->root_key.objectid, + NULL, NULL); + BUG_ON(ret < 0); + if (ret > 0) { + ret = btrfs_del_orphan_item(trans, tree_root, + root->root_key.objectid); + BUG_ON(ret); + } + } + + if (root->in_radix) { + btrfs_free_fs_root(tree_root->fs_info, root); + } else { + free_extent_buffer(root->node); + free_extent_buffer(root->commit_root); + kfree(root); + } out: btrfs_end_transaction(trans, tree_root); kfree(wc); @@ -5255,6 +5137,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, wc->stage = DROP_REFERENCE; wc->update_ref = 0; wc->keep_locks = 1; + wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); while (1) { wret = walk_down_tree(trans, root, path, wc); @@ -5397,9 +5280,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode, lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); while (1) { int ret; - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); break; @@ -6842,287 +6725,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root, return 0; } -#if 0 -static int __insert_orphan_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, u64 size) -{ - struct btrfs_path *path; - struct btrfs_inode_item *item; - struct extent_buffer *leaf; - int ret; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - path->leave_spinning = 1; - ret = btrfs_insert_empty_inode(trans, root, path, objectid); - if (ret) - goto out; - - leaf = path->nodes[0]; - item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); - btrfs_set_inode_generation(leaf, item, 1); - btrfs_set_inode_size(leaf, item, size); - btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); - btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); - btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(root, path); -out: - btrfs_free_path(path); - return ret; -} - -static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *group) +/* + * checks to see if its even possible to relocate this block group. + * + * @return - -1 if it's not a good idea to relocate this block group, 0 if its + * ok to go ahead and try. + */ +int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) { - struct inode *inode = NULL; - struct btrfs_trans_handle *trans; - struct btrfs_root *root; - struct btrfs_key root_key; - u64 objectid = BTRFS_FIRST_FREE_OBJECTID; - int err = 0; + struct btrfs_block_group_cache *block_group; + struct btrfs_space_info *space_info; + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; + struct btrfs_device *device; + int full = 0; + int ret = 0; - root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; - root_key.type = BTRFS_ROOT_ITEM_KEY; - root_key.offset = (u64)-1; - root = btrfs_read_fs_root_no_name(fs_info, &root_key); - if (IS_ERR(root)) - return ERR_CAST(root); + block_group = btrfs_lookup_block_group(root->fs_info, bytenr); - trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); + /* odd, couldn't find the block group, leave it alone */ + if (!block_group) + return -1; - err = btrfs_find_free_objectid(trans, root, objectid, &objectid); - if (err) + /* no bytes used, we're good */ + if (!btrfs_block_group_used(&block_group->item)) goto out; - err = __insert_orphan_inode(trans, root, objectid, group->key.offset); - BUG_ON(err); - - err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, - group->key.offset, 0, group->key.offset, - 0, 0, 0); - BUG_ON(err); - - inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); - if (inode->i_state & I_NEW) { - BTRFS_I(inode)->root = root; - BTRFS_I(inode)->location.objectid = objectid; - BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; - BTRFS_I(inode)->location.offset = 0; - btrfs_read_locked_inode(inode); - unlock_new_inode(inode); - BUG_ON(is_bad_inode(inode)); - } else { - BUG_ON(1); - } - BTRFS_I(inode)->index_cnt = group->key.objectid; - - err = btrfs_orphan_add(trans, inode); -out: - btrfs_end_transaction(trans, root); - if (err) { - if (inode) - iput(inode); - inode = ERR_PTR(err); - } - return inode; -} - -int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) -{ - - struct btrfs_ordered_sum *sums; - struct btrfs_sector_sum *sector_sum; - struct btrfs_ordered_extent *ordered; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct list_head list; - size_t offset; - int ret; - u64 disk_bytenr; - - INIT_LIST_HEAD(&list); - - ordered = btrfs_lookup_ordered_extent(inode, file_pos); - BUG_ON(ordered->file_offset != file_pos || ordered->len != len); - - disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; - ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, - disk_bytenr + len - 1, &list); - - while (!list_empty(&list)) { - sums = list_entry(list.next, struct btrfs_ordered_sum, list); - list_del_init(&sums->list); - - sector_sum = sums->sums; - sums->bytenr = ordered->start; + space_info = block_group->space_info; + spin_lock(&space_info->lock); - offset = 0; - while (offset < sums->len) { - sector_sum->bytenr += ordered->start - disk_bytenr; - sector_sum++; - offset += root->sectorsize; - } + full = space_info->full; - btrfs_add_ordered_sum(inode, ordered, sums); + /* + * if this is the last block group we have in this space, we can't + * relocate it unless we're able to allocate a new chunk below. + * + * Otherwise, we need to make sure we have room in the space to handle + * all of the extents from this block group. If we can, we're good + */ + if ((space_info->total_bytes != block_group->key.offset) && + (space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + btrfs_block_group_used(&block_group->item) < + space_info->total_bytes)) { + spin_unlock(&space_info->lock); + goto out; } - btrfs_put_ordered_extent(ordered); - return 0; -} - -int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) -{ - struct btrfs_trans_handle *trans; - struct btrfs_path *path; - struct btrfs_fs_info *info = root->fs_info; - struct extent_buffer *leaf; - struct inode *reloc_inode; - struct btrfs_block_group_cache *block_group; - struct btrfs_key key; - u64 skipped; - u64 cur_byte; - u64 total_found; - u32 nritems; - int ret; - int progress; - int pass = 0; - - root = root->fs_info->extent_root; - - block_group = btrfs_lookup_block_group(info, group_start); - BUG_ON(!block_group); - - printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", - (unsigned long long)block_group->key.objectid, - (unsigned long long)block_group->flags); - - path = btrfs_alloc_path(); - BUG_ON(!path); - - reloc_inode = create_reloc_inode(info, block_group); - BUG_ON(IS_ERR(reloc_inode)); - - __alloc_chunk_for_shrink(root, block_group, 1); - set_block_group_readonly(block_group); - - btrfs_start_delalloc_inodes(info->tree_root); - btrfs_wait_ordered_extents(info->tree_root, 0); -again: - skipped = 0; - total_found = 0; - progress = 0; - key.objectid = block_group->key.objectid; - key.offset = 0; - key.type = 0; - cur_byte = key.objectid; - - trans = btrfs_start_transaction(info->tree_root, 1); - btrfs_commit_transaction(trans, info->tree_root); + spin_unlock(&space_info->lock); - mutex_lock(&root->fs_info->cleaner_mutex); - btrfs_clean_old_snapshots(info->tree_root); - btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); - mutex_unlock(&root->fs_info->cleaner_mutex); + /* + * ok we don't have enough space, but maybe we have free space on our + * devices to allocate new chunks for relocation, so loop through our + * alloc devices and guess if we have enough space. However, if we + * were marked as full, then we know there aren't enough chunks, and we + * can just return. + */ + ret = -1; + if (full) + goto out; - trans = btrfs_start_transaction(info->tree_root, 1); - btrfs_commit_transaction(trans, info->tree_root); + mutex_lock(&root->fs_info->chunk_mutex); + list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { + u64 min_free = btrfs_block_group_used(&block_group->item); + u64 dev_offset, max_avail; - while (1) { - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto out; -next: - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret < 0) - goto out; - if (ret == 1) { - ret = 0; + /* + * check to make sure we can actually find a chunk with enough + * space to fit our block group in. + */ + if (device->total_bytes > device->bytes_used + min_free) { + ret = find_free_dev_extent(NULL, device, min_free, + &dev_offset, &max_avail); + if (!ret) break; - } - leaf = path->nodes[0]; - nritems = btrfs_header_nritems(leaf); + ret = -1; } - - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - - if (key.objectid >= block_group->key.objectid + - block_group->key.offset) - break; - - if (progress && need_resched()) { - btrfs_release_path(root, path); - cond_resched(); - progress = 0; - continue; - } - progress = 1; - - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || - key.objectid + key.offset <= cur_byte) { - path->slots[0]++; - goto next; - } - - total_found++; - cur_byte = key.objectid + key.offset; - btrfs_release_path(root, path); - - __alloc_chunk_for_shrink(root, block_group, 0); - ret = relocate_one_extent(root, path, &key, block_group, - reloc_inode, pass); - BUG_ON(ret < 0); - if (ret > 0) - skipped++; - - key.objectid = cur_byte; - key.type = 0; - key.offset = 0; - } - - btrfs_release_path(root, path); - - if (pass == 0) { - btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); - invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); - } - - if (total_found > 0) { - printk(KERN_INFO "btrfs found %llu extents in pass %d\n", - (unsigned long long)total_found, pass); - pass++; - if (total_found == skipped && pass > 2) { - iput(reloc_inode); - reloc_inode = create_reloc_inode(info, block_group); - pass = 0; - } - goto again; } - - /* delete reloc_inode */ - iput(reloc_inode); - - /* unpin extents in this range */ - trans = btrfs_start_transaction(info->tree_root, 1); - btrfs_commit_transaction(trans, info->tree_root); - - spin_lock(&block_group->lock); - WARN_ON(block_group->pinned > 0); - WARN_ON(block_group->reserved > 0); - WARN_ON(btrfs_block_group_used(&block_group->item) > 0); - spin_unlock(&block_group->lock); - btrfs_put_block_group(block_group); - ret = 0; + mutex_unlock(&root->fs_info->chunk_mutex); out: - btrfs_free_path(path); + btrfs_put_block_group(block_group); return ret; } -#endif static int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key) @@ -7165,8 +6847,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) { struct btrfs_block_group_cache *block_group; struct btrfs_space_info *space_info; + struct btrfs_caching_control *caching_ctl; struct rb_node *n; + down_write(&info->extent_commit_sem); + while (!list_empty(&info->caching_block_groups)) { + caching_ctl = list_entry(info->caching_block_groups.next, + struct btrfs_caching_control, list); + list_del(&caching_ctl->list); + put_caching_control(caching_ctl); + } + up_write(&info->extent_commit_sem); + spin_lock(&info->block_group_cache_lock); while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { block_group = rb_entry(n, struct btrfs_block_group_cache, @@ -7180,8 +6872,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) up_write(&block_group->space_info->groups_sem); if (block_group->cached == BTRFS_CACHE_STARTED) - wait_event(block_group->caching_q, - block_group_cache_done(block_group)); + wait_block_group_cache_done(block_group); btrfs_remove_free_space_cache(block_group); @@ -7251,7 +6942,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) spin_lock_init(&cache->lock); spin_lock_init(&cache->tree_lock); cache->fs_info = info; - init_waitqueue_head(&cache->caching_q); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); @@ -7273,8 +6963,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->flags = btrfs_block_group_flags(&cache->item); cache->sectorsize = root->sectorsize; - remove_sb_from_cache(root, cache); - /* * check for two cases, either we are full, and therefore * don't need to bother with the caching work since we won't @@ -7283,13 +6971,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) * time, particularly in the full case. */ if (found_key.offset == btrfs_block_group_used(&cache->item)) { + exclude_super_stripes(root, cache); + cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; + free_excluded_extents(root, cache); } else if (btrfs_block_group_used(&cache->item) == 0) { + exclude_super_stripes(root, cache); + cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; add_new_free_space(cache, root->fs_info, found_key.objectid, found_key.objectid + found_key.offset); + free_excluded_extents(root, cache); } ret = update_space_info(info, cache->flags, found_key.offset, @@ -7297,6 +6991,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) &space_info); BUG_ON(ret); cache->space_info = space_info; + spin_lock(&cache->space_info->lock); + cache->space_info->bytes_super += cache->bytes_super; + spin_unlock(&cache->space_info->lock); + down_write(&space_info->groups_sem); list_add_tail(&cache->list, &space_info->block_groups); up_write(&space_info->groups_sem); @@ -7346,7 +7044,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); spin_lock_init(&cache->tree_lock); - init_waitqueue_head(&cache->caching_q); INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); @@ -7355,15 +7052,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->flags = type; btrfs_set_block_group_flags(&cache->item, type); + cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; - remove_sb_from_cache(root, cache); + exclude_super_stripes(root, cache); add_new_free_space(cache, root->fs_info, chunk_offset, chunk_offset + size); + free_excluded_extents(root, cache); + ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, &cache->space_info); BUG_ON(ret); + + spin_lock(&cache->space_info->lock); + cache->space_info->bytes_super += cache->bytes_super; + spin_unlock(&cache->space_info->lock); + down_write(&cache->space_info->groups_sem); list_add_tail(&cache->list, &cache->space_info->block_groups); up_write(&cache->space_info->groups_sem); @@ -7429,8 +7134,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, up_write(&block_group->space_info->groups_sem); if (block_group->cached == BTRFS_CACHE_STARTED) - wait_event(block_group->caching_q, - block_group_cache_done(block_group)); + wait_block_group_cache_done(block_group); btrfs_remove_free_space_cache(block_group); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 68260180f58..0cb88f8146e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -367,10 +367,10 @@ static int insert_state(struct extent_io_tree *tree, } if (bits & EXTENT_DIRTY) tree->dirty_bytes += end - start + 1; - set_state_cb(tree, state, bits); - state->state |= bits; state->start = start; state->end = end; + set_state_cb(tree, state, bits); + state->state |= bits; node = tree_insert(&tree->state, end, &state->rb_node); if (node) { struct extent_state *found; @@ -471,10 +471,14 @@ static int clear_state_bit(struct extent_io_tree *tree, * bits were already set, or zero if none of the bits were already set. */ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - int bits, int wake, int delete, gfp_t mask) + int bits, int wake, int delete, + struct extent_state **cached_state, + gfp_t mask) { struct extent_state *state; + struct extent_state *cached; struct extent_state *prealloc = NULL; + struct rb_node *next_node; struct rb_node *node; u64 last_end; int err; @@ -488,6 +492,17 @@ again: } spin_lock(&tree->lock); + if (cached_state) { + cached = *cached_state; + *cached_state = NULL; + cached_state = NULL; + if (cached && cached->tree && cached->start == start) { + atomic_dec(&cached->refs); + state = cached; + goto hit_next; + } + free_extent_state(cached); + } /* * this search will find the extents that end after * our range starts @@ -496,6 +511,7 @@ again: if (!node) goto out; state = rb_entry(node, struct extent_state, rb_node); +hit_next: if (state->start > end) goto out; WARN_ON(state->end < start); @@ -531,8 +547,6 @@ again: if (last_end == (u64)-1) goto out; start = last_end + 1; - } else { - start = state->start; } goto search_again; } @@ -550,16 +564,28 @@ again: if (wake) wake_up(&state->wq); + set |= clear_state_bit(tree, prealloc, bits, wake, delete); prealloc = NULL; goto out; } + if (state->end < end && prealloc && !need_resched()) + next_node = rb_next(&state->rb_node); + else + next_node = NULL; + set |= clear_state_bit(tree, state, bits, wake, delete); if (last_end == (u64)-1) goto out; start = last_end + 1; + if (start <= end && next_node) { + state = rb_entry(next_node, struct extent_state, + rb_node); + if (state->start == start) + goto hit_next; + } goto search_again; out: @@ -653,28 +679,40 @@ static void set_state_bits(struct extent_io_tree *tree, state->state |= bits; } +static void cache_state(struct extent_state *state, + struct extent_state **cached_ptr) +{ + if (cached_ptr && !(*cached_ptr)) { + if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) { + *cached_ptr = state; + atomic_inc(&state->refs); + } + } +} + /* - * set some bits on a range in the tree. This may require allocations - * or sleeping, so the gfp mask is used to indicate what is allowed. + * set some bits on a range in the tree. This may require allocations or + * sleeping, so the gfp mask is used to indicate what is allowed. * - * If 'exclusive' == 1, this will fail with -EEXIST if some part of the - * range already has the desired bits set. The start of the existing - * range is returned in failed_start in this case. + * If any of the exclusive bits are set, this will fail with -EEXIST if some + * part of the range already has the desired bits set. The start of the + * existing range is returned in failed_start in this case. * - * [start, end] is inclusive - * This takes the tree lock. + * [start, end] is inclusive This takes the tree lock. */ + static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - int bits, int exclusive, u64 *failed_start, + int bits, int exclusive_bits, u64 *failed_start, + struct extent_state **cached_state, gfp_t mask) { struct extent_state *state; struct extent_state *prealloc = NULL; struct rb_node *node; int err = 0; - int set; u64 last_start; u64 last_end; + again: if (!prealloc && (mask & __GFP_WAIT)) { prealloc = alloc_extent_state(mask); @@ -683,6 +721,13 @@ again: } spin_lock(&tree->lock); + if (cached_state && *cached_state) { + state = *cached_state; + if (state->start == start && state->tree) { + node = &state->rb_node; + goto hit_next; + } + } /* * this search will find all the extents that end after * our range starts. @@ -694,8 +739,8 @@ again: BUG_ON(err == -EEXIST); goto out; } - state = rb_entry(node, struct extent_state, rb_node); +hit_next: last_start = state->start; last_end = state->end; @@ -706,17 +751,29 @@ again: * Just lock what we found and keep going */ if (state->start == start && state->end <= end) { - set = state->state & bits; - if (set && exclusive) { + struct rb_node *next_node; + if (state->state & exclusive_bits) { *failed_start = state->start; err = -EEXIST; goto out; } + set_state_bits(tree, state, bits); + cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) goto out; + start = last_end + 1; + if (start < end && prealloc && !need_resched()) { + next_node = rb_next(node); + if (next_node) { + state = rb_entry(next_node, struct extent_state, + rb_node); + if (state->start == start) + goto hit_next; + } + } goto search_again; } @@ -737,8 +794,7 @@ again: * desired bit on it. */ if (state->start < start) { - set = state->state & bits; - if (exclusive && set) { + if (state->state & exclusive_bits) { *failed_start = start; err = -EEXIST; goto out; @@ -750,12 +806,11 @@ again: goto out; if (state->end <= end) { set_state_bits(tree, state, bits); + cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) goto out; start = last_end + 1; - } else { - start = state->start; } goto search_again; } @@ -774,6 +829,7 @@ again: this_end = last_start - 1; err = insert_state(tree, prealloc, start, this_end, bits); + cache_state(prealloc, cached_state); prealloc = NULL; BUG_ON(err == -EEXIST); if (err) @@ -788,8 +844,7 @@ again: * on the first half */ if (state->start <= end && state->end > end) { - set = state->state & bits; - if (exclusive && set) { + if (state->state & exclusive_bits) { *failed_start = start; err = -EEXIST; goto out; @@ -798,6 +853,7 @@ again: BUG_ON(err == -EEXIST); set_state_bits(tree, prealloc, bits); + cache_state(prealloc, cached_state); merge_state(tree, prealloc); prealloc = NULL; goto out; @@ -826,86 +882,64 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, - mask); -} - -int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); + NULL, mask); } int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask) { return set_extent_bit(tree, start, end, bits, 0, NULL, - mask); + NULL, mask); } int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask) { - return clear_extent_bit(tree, start, end, bits, 0, 0, mask); + return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); } int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_DIRTY, - 0, NULL, mask); + EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, + 0, NULL, NULL, mask); } int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); -} - -int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, + NULL, mask); } int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, - mask); + NULL, mask); } static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { - return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); + return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, + NULL, mask); } int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, - mask); + NULL, mask); } static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { - return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); -} - -static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, - 0, NULL, mask); -} - -static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, - u64 end, gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); + return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, + NULL, mask); } int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) @@ -917,13 +951,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) * either insert or lock state struct between start and end use mask to tell * us if waiting is desired. */ -int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) +int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, struct extent_state **cached_state, gfp_t mask) { int err; u64 failed_start; while (1) { - err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, - &failed_start, mask); + err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, + EXTENT_LOCKED, &failed_start, + cached_state, mask); if (err == -EEXIST && (mask & __GFP_WAIT)) { wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); start = failed_start; @@ -935,27 +971,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) return err; } +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) +{ + return lock_extent_bits(tree, start, end, 0, NULL, mask); +} + int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { int err; u64 failed_start; - err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, - &failed_start, mask); + err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, + &failed_start, NULL, mask); if (err == -EEXIST) { if (failed_start > start) clear_extent_bit(tree, start, failed_start - 1, - EXTENT_LOCKED, 1, 0, mask); + EXTENT_LOCKED, 1, 0, NULL, mask); return 0; } return 1; } +int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, + struct extent_state **cached, gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, + mask); +} + int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { - return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, + mask); } /* @@ -974,7 +1023,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) page_cache_release(page); index++; } - set_extent_dirty(tree, start, end, GFP_NOFS); return 0; } @@ -994,7 +1042,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) page_cache_release(page); index++; } - set_extent_writeback(tree, start, end, GFP_NOFS); return 0; } @@ -1232,6 +1279,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode, u64 delalloc_start; u64 delalloc_end; u64 found; + struct extent_state *cached_state = NULL; int ret; int loops = 0; @@ -1269,6 +1317,7 @@ again: /* some of the pages are gone, lets avoid looping by * shortening the size of the delalloc range we're searching */ + free_extent_state(cached_state); if (!loops) { unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); max_bytes = PAGE_CACHE_SIZE - offset; @@ -1282,18 +1331,21 @@ again: BUG_ON(ret); /* step three, lock the state bits for the whole range */ - lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); + lock_extent_bits(tree, delalloc_start, delalloc_end, + 0, &cached_state, GFP_NOFS); /* then test to make sure it is all still delalloc */ ret = test_range_bit(tree, delalloc_start, delalloc_end, - EXTENT_DELALLOC, 1); + EXTENT_DELALLOC, 1, cached_state); if (!ret) { - unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); + unlock_extent_cached(tree, delalloc_start, delalloc_end, + &cached_state, GFP_NOFS); __unlock_for_delalloc(inode, locked_page, delalloc_start, delalloc_end); cond_resched(); goto again; } + free_extent_state(cached_state); *start = delalloc_start; *end = delalloc_end; out_failed: @@ -1307,7 +1359,8 @@ int extent_clear_unlock_delalloc(struct inode *inode, int clear_unlock, int clear_delalloc, int clear_dirty, int set_writeback, - int end_writeback) + int end_writeback, + int set_private2) { int ret; struct page *pages[16]; @@ -1325,8 +1378,9 @@ int extent_clear_unlock_delalloc(struct inode *inode, if (clear_delalloc) clear_bits |= EXTENT_DELALLOC; - clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); - if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) + clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); + if (!(unlock_pages || clear_dirty || set_writeback || end_writeback || + set_private2)) return 0; while (nr_pages > 0) { @@ -1334,6 +1388,10 @@ int extent_clear_unlock_delalloc(struct inode *inode, min_t(unsigned long, nr_pages, ARRAY_SIZE(pages)), pages); for (i = 0; i < ret; i++) { + + if (set_private2) + SetPagePrivate2(pages[i]); + if (pages[i] == locked_page) { page_cache_release(pages[i]); continue; @@ -1476,14 +1534,17 @@ out: * range is found set. */ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, - int bits, int filled) + int bits, int filled, struct extent_state *cached) { struct extent_state *state = NULL; struct rb_node *node; int bitset = 0; spin_lock(&tree->lock); - node = tree_search(tree, start); + if (cached && cached->tree && cached->start == start) + node = &cached->rb_node; + else + node = tree_search(tree, start); while (node && start <= end) { state = rb_entry(node, struct extent_state, rb_node); @@ -1503,6 +1564,10 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, bitset = 0; break; } + + if (state->end == (u64)-1) + break; + start = state->end + 1; if (start > end) break; @@ -1526,7 +1591,7 @@ static int check_page_uptodate(struct extent_io_tree *tree, { u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) + if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) SetPageUptodate(page); return 0; } @@ -1540,7 +1605,7 @@ static int check_page_locked(struct extent_io_tree *tree, { u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) + if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) unlock_page(page); return 0; } @@ -1552,10 +1617,7 @@ static int check_page_locked(struct extent_io_tree *tree, static int check_page_writeback(struct extent_io_tree *tree, struct page *page) { - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) - end_page_writeback(page); + end_page_writeback(page); return 0; } @@ -1613,13 +1675,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err) } if (!uptodate) { - clear_extent_uptodate(tree, start, end, GFP_ATOMIC); + clear_extent_uptodate(tree, start, end, GFP_NOFS); ClearPageUptodate(page); SetPageError(page); } - clear_extent_writeback(tree, start, end, GFP_ATOMIC); - if (whole_page) end_page_writeback(page); else @@ -1983,7 +2043,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, continue; } /* the get_extent function already copied into the page */ - if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { + if (test_range_bit(tree, cur, cur_end, + EXTENT_UPTODATE, 1, NULL)) { check_page_uptodate(tree, page); unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); cur = cur + iosize; @@ -2078,6 +2139,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, u64 iosize; u64 unlock_start; sector_t sector; + struct extent_state *cached_state = NULL; struct extent_map *em; struct block_device *bdev; int ret; @@ -2124,6 +2186,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_end = 0; page_started = 0; if (!epd->extent_locked) { + u64 delalloc_to_write = 0; /* * make sure the wbc mapping index is at least updated * to this page. @@ -2143,8 +2206,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, tree->ops->fill_delalloc(inode, page, delalloc_start, delalloc_end, &page_started, &nr_written); + /* + * delalloc_end is already one less than the total + * length, so we don't subtract one from + * PAGE_CACHE_SIZE + */ + delalloc_to_write += (delalloc_end - delalloc_start + + PAGE_CACHE_SIZE) >> + PAGE_CACHE_SHIFT; delalloc_start = delalloc_end + 1; } + if (wbc->nr_to_write < delalloc_to_write) { + int thresh = 8192; + + if (delalloc_to_write < thresh * 2) + thresh = delalloc_to_write; + wbc->nr_to_write = min_t(u64, delalloc_to_write, + thresh); + } /* did the fill delalloc function already unlock and start * the IO? @@ -2160,15 +2239,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, goto done_unlocked; } } - lock_extent(tree, start, page_end, GFP_NOFS); - - unlock_start = start; - if (tree->ops && tree->ops->writepage_start_hook) { ret = tree->ops->writepage_start_hook(page, start, page_end); if (ret == -EAGAIN) { - unlock_extent(tree, start, page_end, GFP_NOFS); redirty_page_for_writepage(wbc, page); update_nr_written(page, wbc, nr_written); unlock_page(page); @@ -2184,12 +2258,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, update_nr_written(page, wbc, nr_written + 1); end = page_end; - if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) - printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); - if (last_byte <= start) { - clear_extent_dirty(tree, start, page_end, GFP_NOFS); - unlock_extent(tree, start, page_end, GFP_NOFS); if (tree->ops && tree->ops->writepage_end_io_hook) tree->ops->writepage_end_io_hook(page, start, page_end, NULL, 1); @@ -2197,13 +2266,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, goto done; } - set_extent_uptodate(tree, start, page_end, GFP_NOFS); blocksize = inode->i_sb->s_blocksize; while (cur <= end) { if (cur >= last_byte) { - clear_extent_dirty(tree, cur, page_end, GFP_NOFS); - unlock_extent(tree, unlock_start, page_end, GFP_NOFS); if (tree->ops && tree->ops->writepage_end_io_hook) tree->ops->writepage_end_io_hook(page, cur, page_end, NULL, 1); @@ -2235,12 +2301,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, */ if (compressed || block_start == EXTENT_MAP_HOLE || block_start == EXTENT_MAP_INLINE) { - clear_extent_dirty(tree, cur, - cur + iosize - 1, GFP_NOFS); - - unlock_extent(tree, unlock_start, cur + iosize - 1, - GFP_NOFS); - /* * end_io notification does not happen here for * compressed extents @@ -2265,13 +2325,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } /* leave this out until we have a page_mkwrite call */ if (0 && !test_range_bit(tree, cur, cur + iosize - 1, - EXTENT_DIRTY, 0)) { + EXTENT_DIRTY, 0, NULL)) { cur = cur + iosize; pg_offset += iosize; continue; } - clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); if (tree->ops && tree->ops->writepage_io_hook) { ret = tree->ops->writepage_io_hook(page, cur, cur + iosize - 1); @@ -2309,12 +2368,12 @@ done: set_page_writeback(page); end_page_writeback(page); } - if (unlock_start <= page_end) - unlock_extent(tree, unlock_start, page_end, GFP_NOFS); unlock_page(page); done_unlocked: + /* drop our reference on any cached states */ + free_extent_state(cached_state); return 0; } @@ -2339,9 +2398,9 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, writepage_t writepage, void *data, void (*flush_fn)(void *)) { - struct backing_dev_info *bdi = mapping->backing_dev_info; int ret = 0; int done = 0; + int nr_to_write_done = 0; struct pagevec pvec; int nr_pages; pgoff_t index; @@ -2361,7 +2420,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, scanned = 1; } retry: - while (!done && (index <= end) && + while (!done && !nr_to_write_done && (index <= end) && (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { @@ -2412,12 +2471,15 @@ retry: unlock_page(page); ret = 0; } - if (ret || wbc->nr_to_write <= 0) - done = 1; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; + if (ret) done = 1; - } + + /* + * the filesystem may choose to bump up nr_to_write. + * We have to make sure to honor the new nr_to_write + * at any time + */ + nr_to_write_done = wbc->nr_to_write <= 0; } pagevec_release(&pvec); cond_resched(); @@ -2604,10 +2666,10 @@ int extent_invalidatepage(struct extent_io_tree *tree, return 0; lock_extent(tree, start, end, GFP_NOFS); - wait_on_extent_writeback(tree, start, end); + wait_on_page_writeback(page); clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, - 1, 1, GFP_NOFS); + 1, 1, NULL, GFP_NOFS); return 0; } @@ -2687,7 +2749,7 @@ int extent_prepare_write(struct extent_io_tree *tree, !isnew && !PageUptodate(page) && (block_off_end > to || block_off_start < from) && !test_range_bit(tree, block_start, cur_end, - EXTENT_UPTODATE, 1)) { + EXTENT_UPTODATE, 1, NULL)) { u64 sector; u64 extent_offset = block_start - em->start; size_t iosize; @@ -2701,7 +2763,7 @@ int extent_prepare_write(struct extent_io_tree *tree, */ set_extent_bit(tree, block_start, block_start + iosize - 1, - EXTENT_LOCKED, 0, NULL, GFP_NOFS); + EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS); ret = submit_extent_page(READ, tree, page, sector, iosize, page_offset, em->bdev, NULL, 1, @@ -2742,13 +2804,18 @@ int try_release_extent_state(struct extent_map_tree *map, int ret = 1; if (test_range_bit(tree, start, end, - EXTENT_IOBITS | EXTENT_ORDERED, 0)) + EXTENT_IOBITS, 0, NULL)) ret = 0; else { if ((mask & GFP_NOFS) == GFP_NOFS) mask = GFP_NOFS; - clear_extent_bit(tree, start, end, EXTENT_UPTODATE, - 1, 1, mask); + /* + * at this point we can safely clear everything except the + * locked bit and the nodatasum bit + */ + clear_extent_bit(tree, start, end, + ~(EXTENT_LOCKED | EXTENT_NODATASUM), + 0, 0, NULL, mask); } return ret; } @@ -2771,29 +2838,28 @@ int try_release_extent_mapping(struct extent_map_tree *map, u64 len; while (start <= end) { len = end - start + 1; - spin_lock(&map->lock); + write_lock(&map->lock); em = lookup_extent_mapping(map, start, len); if (!em || IS_ERR(em)) { - spin_unlock(&map->lock); + write_unlock(&map->lock); break; } if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || em->start != start) { - spin_unlock(&map->lock); + write_unlock(&map->lock); free_extent_map(em); break; } if (!test_range_bit(tree, em->start, extent_map_end(em) - 1, - EXTENT_LOCKED | EXTENT_WRITEBACK | - EXTENT_ORDERED, - 0)) { + EXTENT_LOCKED | EXTENT_WRITEBACK, + 0, NULL)) { remove_extent_mapping(map, em); /* once for the rb tree */ free_extent_map(em); } start = extent_map_end(em); - spin_unlock(&map->lock); + write_unlock(&map->lock); /* once for us */ free_extent_map(em); @@ -3203,7 +3269,7 @@ int extent_range_uptodate(struct extent_io_tree *tree, int uptodate; unsigned long index; - ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); + ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); if (ret) return 1; while (start <= end) { @@ -3233,7 +3299,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, return 1; ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1); + EXTENT_UPTODATE, 1, NULL); if (ret) return ret; @@ -3269,7 +3335,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, return 0; if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1)) { + EXTENT_UPTODATE, 1, NULL)) { return 0; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 5bc20abf3f3..14ed16fd862 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -13,10 +13,8 @@ #define EXTENT_DEFRAG (1 << 6) #define EXTENT_DEFRAG_DONE (1 << 7) #define EXTENT_BUFFER_FILLED (1 << 8) -#define EXTENT_ORDERED (1 << 9) -#define EXTENT_ORDERED_METADATA (1 << 10) -#define EXTENT_BOUNDARY (1 << 11) -#define EXTENT_NODATASUM (1 << 12) +#define EXTENT_BOUNDARY (1 << 9) +#define EXTENT_NODATASUM (1 << 10) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) /* flags for bio submission */ @@ -142,6 +140,8 @@ int try_release_extent_state(struct extent_map_tree *map, struct extent_io_tree *tree, struct page *page, gfp_t mask); int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, struct extent_state **cached, gfp_t mask); int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); @@ -155,11 +155,12 @@ u64 count_range_bits(struct extent_io_tree *tree, u64 max_bytes, unsigned long bits); int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, - int bits, int filled); + int bits, int filled, struct extent_state *cached_state); int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask); int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - int bits, int wake, int delete, gfp_t mask); + int bits, int wake, int delete, struct extent_state **cached, + gfp_t mask); int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, gfp_t mask); int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, @@ -282,5 +283,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, int clear_unlock, int clear_delalloc, int clear_dirty, int set_writeback, - int end_writeback); + int end_writeback, + int set_private2); #endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 30c9365861e..2c726b7b9fa 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -36,7 +36,7 @@ void extent_map_exit(void) void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) { tree->map.rb_node = NULL; - spin_lock_init(&tree->lock); + rwlock_init(&tree->lock); } /** @@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) return 0; } +int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) +{ + int ret = 0; + struct extent_map *merge = NULL; + struct rb_node *rb; + struct extent_map *em; + + write_lock(&tree->lock); + em = lookup_extent_mapping(tree, start, len); + + WARN_ON(em->start != start || !em); + + if (!em) + goto out; + + clear_bit(EXTENT_FLAG_PINNED, &em->flags); + + if (em->start != 0) { + rb = rb_prev(&em->rb_node); + if (rb) + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(merge, em)) { + em->start = merge->start; + em->len += merge->len; + em->block_len += merge->block_len; + em->block_start = merge->block_start; + merge->in_tree = 0; + rb_erase(&merge->rb_node, &tree->map); + free_extent_map(merge); + } + } + + rb = rb_next(&em->rb_node); + if (rb) + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(em, merge)) { + em->len += merge->len; + em->block_len += merge->len; + rb_erase(&merge->rb_node, &tree->map); + merge->in_tree = 0; + free_extent_map(merge); + } + + free_extent_map(em); +out: + write_unlock(&tree->lock); + return ret; + +} + /** * add_extent_mapping - add new extent map to the extent tree * @tree: tree to insert new map in @@ -222,7 +272,6 @@ int add_extent_mapping(struct extent_map_tree *tree, ret = -EEXIST; goto out; } - assert_spin_locked(&tree->lock); rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { ret = -EEXIST; @@ -285,7 +334,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, struct rb_node *next = NULL; u64 end = range_end(start, len); - assert_spin_locked(&tree->lock); rb_node = __tree_search(&tree->map, start, &prev, &next); if (!rb_node && prev) { em = rb_entry(prev, struct extent_map, rb_node); @@ -319,6 +367,54 @@ out: } /** + * search_extent_mapping - find a nearby extent map + * @tree: tree to lookup in + * @start: byte offset to start the search + * @len: length of the lookup range + * + * Find and return the first extent_map struct in @tree that intersects the + * [start, len] range. + * + * If one can't be found, any nearby extent may be returned + */ +struct extent_map *search_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 len) +{ + struct extent_map *em; + struct rb_node *rb_node; + struct rb_node *prev = NULL; + struct rb_node *next = NULL; + + rb_node = __tree_search(&tree->map, start, &prev, &next); + if (!rb_node && prev) { + em = rb_entry(prev, struct extent_map, rb_node); + goto found; + } + if (!rb_node && next) { + em = rb_entry(next, struct extent_map, rb_node); + goto found; + } + if (!rb_node) { + em = NULL; + goto out; + } + if (IS_ERR(rb_node)) { + em = ERR_PTR(PTR_ERR(rb_node)); + goto out; + } + em = rb_entry(rb_node, struct extent_map, rb_node); + goto found; + + em = NULL; + goto out; + +found: + atomic_inc(&em->refs); +out: + return em; +} + +/** * remove_extent_mapping - removes an extent_map from the extent tree * @tree: extent tree to remove from * @em: extent map beeing removed @@ -331,7 +427,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) int ret = 0; WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); - assert_spin_locked(&tree->lock); rb_erase(&em->rb_node, &tree->map); em->in_tree = 0; return ret; diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index fb6eeef06bb..ab6d74b6e64 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -31,7 +31,7 @@ struct extent_map { struct extent_map_tree { struct rb_root map; - spinlock_t lock; + rwlock_t lock; }; static inline u64 extent_map_end(struct extent_map *em) @@ -59,4 +59,7 @@ struct extent_map *alloc_extent_map(gfp_t mask); void free_extent_map(struct extent_map *em); int __init extent_map_init(void); void extent_map_exit(void); +int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len); +struct extent_map *search_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 len); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4b833972273..571ad3c13b4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -112,8 +112,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, int err = 0; int i; struct inode *inode = fdentry(file)->d_inode; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - u64 hint_byte; u64 num_bytes; u64 start_pos; u64 end_of_last_block; @@ -125,22 +123,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, root->sectorsize - 1) & ~((u64)root->sectorsize - 1); end_of_last_block = start_pos + num_bytes - 1; - - lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); - trans = btrfs_join_transaction(root, 1); - if (!trans) { - err = -ENOMEM; - goto out_unlock; - } - btrfs_set_trans_block_group(trans, inode); - hint_byte = 0; - - set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); - - /* check for reserved extents on each page, we don't want - * to reset the delalloc bit on things that already have - * extents reserved. - */ btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; @@ -155,9 +137,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, * at this time. */ } - err = btrfs_end_transaction(trans, root); -out_unlock: - unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; } @@ -189,18 +168,18 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, if (!split2) split2 = alloc_extent_map(GFP_NOFS); - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (!em) { - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); break; } flags = em->flags; if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { - spin_unlock(&em_tree->lock); if (em->start <= start && (!testend || em->start + em->len >= start + len)) { free_extent_map(em); + write_unlock(&em_tree->lock); break; } if (start < em->start) { @@ -210,6 +189,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, start = em->start + em->len; } free_extent_map(em); + write_unlock(&em_tree->lock); continue; } compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); @@ -260,7 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, free_extent_map(split); split = NULL; } - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); /* once for us */ free_extent_map(em); @@ -289,7 +269,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 locked_end, - u64 inline_limit, u64 *hint_byte) + u64 inline_limit, u64 *hint_byte, int drop_cache) { u64 extent_end = 0; u64 search_start = start; @@ -314,7 +294,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, int ret; inline_limit = 0; - btrfs_drop_extent_cache(inode, start, end - 1, 0); + if (drop_cache) + btrfs_drop_extent_cache(inode, start, end - 1, 0); path = btrfs_alloc_path(); if (!path) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 5edcee3a617..5c2caad7621 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -259,7 +259,9 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) { - u64 max_bytes, possible_bytes; + u64 max_bytes; + u64 bitmap_bytes; + u64 extent_bytes; /* * The goal is to keep the total amount of memory used per 1gb of space @@ -269,22 +271,27 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) max_bytes = MAX_CACHE_BYTES_PER_GIG * (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); - possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) + - (sizeof(struct btrfs_free_space) * - block_group->extents_thresh); + /* + * we want to account for 1 more bitmap than what we have so we can make + * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as + * we add more bitmaps. + */ + bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; - if (possible_bytes > max_bytes) { - int extent_bytes = max_bytes - - (block_group->total_bitmaps * PAGE_CACHE_SIZE); + if (bitmap_bytes >= max_bytes) { + block_group->extents_thresh = 0; + return; + } - if (extent_bytes <= 0) { - block_group->extents_thresh = 0; - return; - } + /* + * we want the extent entry threshold to always be at most 1/2 the maxw + * bytes we can have, or whatever is less than that. + */ + extent_bytes = max_bytes - bitmap_bytes; + extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); - block_group->extents_thresh = extent_bytes / - (sizeof(struct btrfs_free_space)); - } + block_group->extents_thresh = + div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); } static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, @@ -403,6 +410,7 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group, BUG_ON(block_group->total_bitmaps >= max_bitmaps); info->offset = offset_to_bitmap(block_group, offset); + info->bytes = 0; link_free_space(block_group, info); block_group->total_bitmaps++; diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 6b627c61180..72ce3c173d6 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -149,6 +149,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, ptr = (unsigned long)(ref + 1); ret = 0; } else if (ret < 0) { + if (ret == -EOVERFLOW) + ret = -EMLINK; goto out; } else { ref = btrfs_item_ptr(path->nodes[0], path->slots[0], @@ -177,8 +179,6 @@ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(struct btrfs_inode_item)); - if (ret == 0 && objectid > root->highest_inode) - root->highest_inode = objectid; return ret; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 9abbced1123..c56eb590917 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -43,9 +43,10 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) slot = path->slots[0] - 1; l = path->nodes[0]; btrfs_item_key_to_cpu(l, &found_key, slot); - *objectid = found_key.objectid; + *objectid = max_t(u64, found_key.objectid, + BTRFS_FIRST_FREE_OBJECTID - 1); } else { - *objectid = BTRFS_FIRST_FREE_OBJECTID; + *objectid = BTRFS_FIRST_FREE_OBJECTID - 1; } ret = 0; error: @@ -53,91 +54,27 @@ error: return ret; } -/* - * walks the btree of allocated inodes and find a hole. - */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 dirid, u64 *objectid) { - struct btrfs_path *path; - struct btrfs_key key; int ret; - int slot = 0; - u64 last_ino = 0; - int start_found; - struct extent_buffer *l; - struct btrfs_key search_key; - u64 search_start = dirid; - mutex_lock(&root->objectid_mutex); - if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID && - root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) { - *objectid = ++root->last_inode_alloc; - mutex_unlock(&root->objectid_mutex); - return 0; - } - path = btrfs_alloc_path(); - BUG_ON(!path); - search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID); - search_key.objectid = search_start; - search_key.type = 0; - search_key.offset = 0; - - start_found = 0; - ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); - if (ret < 0) - goto error; - while (1) { - l = path->nodes[0]; - slot = path->slots[0]; - if (slot >= btrfs_header_nritems(l)) { - ret = btrfs_next_leaf(root, path); - if (ret == 0) - continue; - if (ret < 0) - goto error; - if (!start_found) { - *objectid = search_start; - start_found = 1; - goto found; - } - *objectid = last_ino > search_start ? - last_ino : search_start; - goto found; - } - btrfs_item_key_to_cpu(l, &key, slot); - if (key.objectid >= search_start) { - if (start_found) { - if (last_ino < search_start) - last_ino = search_start; - if (key.objectid > last_ino) { - *objectid = last_ino; - goto found; - } - } else if (key.objectid > search_start) { - *objectid = search_start; - goto found; - } - } - if (key.objectid >= BTRFS_LAST_FREE_OBJECTID) - break; + if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { + ret = btrfs_find_highest_inode(root, &root->highest_objectid); + if (ret) + goto out; + } - start_found = 1; - last_ino = key.objectid + 1; - path->slots[0]++; + if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) { + ret = -ENOSPC; + goto out; } - BUG_ON(1); -found: - btrfs_release_path(root, path); - btrfs_free_path(path); - BUG_ON(*objectid < search_start); - mutex_unlock(&root->objectid_mutex); - return 0; -error: - btrfs_release_path(root, path); - btrfs_free_path(path); + + *objectid = ++root->highest_objectid; + ret = 0; +out: mutex_unlock(&root->objectid_mutex); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9096fd0ca3c..e9b76bcd1c1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, } ret = btrfs_drop_extents(trans, root, inode, start, - aligned_end, aligned_end, start, &hint_byte); + aligned_end, aligned_end, start, + &hint_byte, 1); BUG_ON(ret); if (isize > actual_end) @@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, inline_len, compressed_size, compressed_pages); BUG_ON(ret); - btrfs_drop_extent_cache(inode, start, aligned_end, 0); + btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); return 0; } @@ -425,7 +426,7 @@ again: extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, start, end, NULL, 1, 0, - 0, 1, 1, 1); + 0, 1, 1, 1, 0); ret = 0; goto free_pages_out; } @@ -611,9 +612,9 @@ static noinline int submit_compressed_extents(struct inode *inode, set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); while (1) { - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); break; @@ -640,7 +641,7 @@ static noinline int submit_compressed_extents(struct inode *inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, - NULL, 1, 1, 0, 1, 1, 0); + NULL, 1, 1, 0, 1, 1, 0, 0); ret = btrfs_submit_compressed_write(inode, async_extent->start, @@ -713,7 +714,7 @@ static noinline int cow_file_range(struct inode *inode, extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, start, end, NULL, 1, 1, - 1, 1, 1, 1); + 1, 1, 1, 1, 0); *nr_written = *nr_written + (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; *page_started = 1; @@ -725,6 +726,15 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(disk_num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); + + read_lock(&BTRFS_I(inode)->extent_tree.lock); + em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, + start, num_bytes); + if (em) { + alloc_hint = em->block_start; + free_extent_map(em); + } + read_unlock(&BTRFS_I(inode)->extent_tree.lock); btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); while (disk_num_bytes > 0) { @@ -737,7 +747,6 @@ static noinline int cow_file_range(struct inode *inode, em = alloc_extent_map(GFP_NOFS); em->start = start; em->orig_start = em->start; - ram_size = ins.offset; em->len = ins.offset; @@ -747,9 +756,9 @@ static noinline int cow_file_range(struct inode *inode, set_bit(EXTENT_FLAG_PINNED, &em->flags); while (1) { - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); break; @@ -776,11 +785,14 @@ static noinline int cow_file_range(struct inode *inode, /* we're not doing compressed IO, don't unlock the first * page (which the caller expects to stay locked), don't * clear any dirty bits and don't set any writeback bits + * + * Do set the Private2 bit so we know this page was properly + * setup for writepage */ extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, start, start + ram_size - 1, locked_page, unlock, 1, - 1, 0, 0, 0); + 1, 0, 0, 0, 1); disk_num_bytes -= cur_alloc_size; num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; @@ -853,7 +865,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, int limit = 10 * 1024 * 1042; clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | - EXTENT_DELALLOC, 1, 0, GFP_NOFS); + EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS); while (start < end) { async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); async_cow->inode = inode; @@ -1080,9 +1092,9 @@ out_check: em->bdev = root->fs_info->fs_devices->latest_bdev; set_bit(EXTENT_FLAG_PINNED, &em->flags); while (1) { - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); break; @@ -1101,7 +1113,7 @@ out_check: extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, cur_offset, cur_offset + num_bytes - 1, - locked_page, 1, 1, 1, 0, 0, 0); + locked_page, 1, 1, 1, 0, 0, 0, 1); cur_offset = extent_end; if (cur_offset > end) break; @@ -1374,10 +1386,8 @@ again: lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); /* already ordered? We're done */ - if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, - EXTENT_ORDERED, 0)) { + if (PagePrivate2(page)) goto out; - } ordered = btrfs_lookup_ordered_extent(inode, page_start); if (ordered) { @@ -1413,11 +1423,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) struct inode *inode = page->mapping->host; struct btrfs_writepage_fixup *fixup; struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, - EXTENT_ORDERED, 0); - if (ret) + /* this page is properly in the ordered list */ + if (TestClearPagePrivate2(page)) return 0; if (PageChecked(page)) @@ -1455,9 +1463,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, BUG_ON(!path); path->leave_spinning = 1; + + /* + * we may be replacing one extent in the tree with another. + * The new extent is pinned in the extent map, and we don't want + * to drop it from the cache until it is completely in the btree. + * + * So, tell btrfs_drop_extents to leave this extent in the cache. + * the caller is expected to unpin it and allow it to be merged + * with the others. + */ ret = btrfs_drop_extents(trans, root, inode, file_pos, file_pos + num_bytes, locked_end, - file_pos, &hint); + file_pos, &hint, 0); BUG_ON(ret); ins.objectid = inode->i_ino; @@ -1485,7 +1503,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); inode_add_bytes(inode, num_bytes); - btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0); ins.objectid = disk_bytenr; ins.offset = disk_num_bytes; @@ -1596,6 +1613,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->len, compressed, 0, 0, BTRFS_FILE_EXTENT_REG); + unpin_extent_cache(&BTRFS_I(inode)->extent_tree, + ordered_extent->file_offset, + ordered_extent->len); BUG_ON(ret); } unlock_extent(io_tree, ordered_extent->file_offset, @@ -1623,6 +1643,7 @@ nocow: static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate) { + ClearPagePrivate2(page); return btrfs_finish_ordered_io(page->mapping->host, start, end); } @@ -1669,13 +1690,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, failrec->last_mirror = 0; failrec->bio_flags = 0; - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, failrec->len); if (em->start > start || em->start + em->len < start) { free_extent_map(em); em = NULL; } - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); if (!em || IS_ERR(em)) { kfree(failrec); @@ -1794,7 +1815,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, return 0; if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && - test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { + test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, GFP_NOFS); return 0; @@ -2352,6 +2373,69 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) return ret; } +int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, u64 objectid, + const char *name, int name_len) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_dir_item *di; + struct btrfs_key key; + u64 index; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + name, name_len, -1); + BUG_ON(!di || IS_ERR(di)); + + leaf = path->nodes[0]; + btrfs_dir_item_key_to_cpu(leaf, di, &key); + WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); + ret = btrfs_delete_one_dir_name(trans, root, path, di); + BUG_ON(ret); + btrfs_release_path(root, path); + + ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, + objectid, root->root_key.objectid, + dir->i_ino, &index, name, name_len); + if (ret < 0) { + BUG_ON(ret != -ENOENT); + di = btrfs_search_dir_index_item(root, path, dir->i_ino, + name, name_len); + BUG_ON(!di || IS_ERR(di)); + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + btrfs_release_path(root, path); + index = key.offset; + } + + di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + index, name, name_len, -1); + BUG_ON(!di || IS_ERR(di)); + + leaf = path->nodes[0]; + btrfs_dir_item_key_to_cpu(leaf, di, &key); + WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); + ret = btrfs_delete_one_dir_name(trans, root, path, di); + BUG_ON(ret); + btrfs_release_path(root, path); + + btrfs_i_size_write(dir, dir->i_size - name_len * 2); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + ret = btrfs_update_inode(trans, root, dir); + BUG_ON(ret); + dir->i_sb->s_dirt = 1; + + btrfs_free_path(path); + return 0; +} + static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; @@ -2361,29 +2445,31 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; unsigned long nr = 0; - /* - * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir - * the root of a subvolume or snapshot - */ if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || - inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { + inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) return -ENOTEMPTY; - } trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); + if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { + err = btrfs_unlink_subvol(trans, root, dir, + BTRFS_I(inode)->location.objectid, + dentry->d_name.name, + dentry->d_name.len); + goto out; + } + err = btrfs_orphan_add(trans, inode); if (err) - goto fail_trans; + goto out; /* now the directory is empty */ err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, dentry->d_name.name, dentry->d_name.len); if (!err) btrfs_i_size_write(inode, 0); - -fail_trans: +out: nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); btrfs_btree_balance_dirty(root, nr); @@ -2935,7 +3021,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) cur_offset, cur_offset + hole_size, block_end, - cur_offset, &hint_byte); + cur_offset, &hint_byte, 1); if (err) break; err = btrfs_insert_file_extent(trans, root, @@ -3003,6 +3089,11 @@ void btrfs_delete_inode(struct inode *inode) } btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (inode->i_nlink > 0) { + BUG_ON(btrfs_root_refs(&root->root_item) != 0); + goto no_delete; + } + btrfs_i_size_write(inode, 0); trans = btrfs_join_transaction(root, 1); @@ -3070,29 +3161,67 @@ out_err: * is kind of like crossing a mount point. */ static int fixup_tree_root_location(struct btrfs_root *root, - struct btrfs_key *location, - struct btrfs_root **sub_root, - struct dentry *dentry) + struct inode *dir, + struct dentry *dentry, + struct btrfs_key *location, + struct btrfs_root **sub_root) { - struct btrfs_root_item *ri; + struct btrfs_path *path; + struct btrfs_root *new_root; + struct btrfs_root_ref *ref; + struct extent_buffer *leaf; + int ret; + int err = 0; - if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) - return 0; - if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) - return 0; + path = btrfs_alloc_path(); + if (!path) { + err = -ENOMEM; + goto out; + } - *sub_root = btrfs_read_fs_root(root->fs_info, location, - dentry->d_name.name, - dentry->d_name.len); - if (IS_ERR(*sub_root)) - return PTR_ERR(*sub_root); + err = -ENOENT; + ret = btrfs_find_root_ref(root->fs_info->tree_root, path, + BTRFS_I(dir)->root->root_key.objectid, + location->objectid); + if (ret) { + if (ret < 0) + err = ret; + goto out; + } - ri = &(*sub_root)->root_item; - location->objectid = btrfs_root_dirid(ri); - btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - location->offset = 0; + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); + if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || + btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) + goto out; - return 0; + ret = memcmp_extent_buffer(leaf, dentry->d_name.name, + (unsigned long)(ref + 1), + dentry->d_name.len); + if (ret) + goto out; + + btrfs_release_path(root->fs_info->tree_root, path); + + new_root = btrfs_read_fs_root_no_name(root->fs_info, location); + if (IS_ERR(new_root)) { + err = PTR_ERR(new_root); + goto out; + } + + if (btrfs_root_refs(&new_root->root_item) == 0) { + err = -ENOENT; + goto out; + } + + *sub_root = new_root; + location->objectid = btrfs_root_dirid(&new_root->root_item); + location->type = BTRFS_INODE_ITEM_KEY; + location->offset = 0; + err = 0; +out: + btrfs_free_path(path); + return err; } static void inode_tree_add(struct inode *inode) @@ -3101,11 +3230,13 @@ static void inode_tree_add(struct inode *inode) struct btrfs_inode *entry; struct rb_node **p; struct rb_node *parent; - again: p = &root->inode_tree.rb_node; parent = NULL; + if (hlist_unhashed(&inode->i_hash)) + return; + spin_lock(&root->inode_lock); while (*p) { parent = *p; @@ -3132,13 +3263,87 @@ again: static void inode_tree_del(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; + int empty = 0; spin_lock(&root->inode_lock); if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); + empty = RB_EMPTY_ROOT(&root->inode_tree); } spin_unlock(&root->inode_lock); + + if (empty && btrfs_root_refs(&root->root_item) == 0) { + synchronize_srcu(&root->fs_info->subvol_srcu); + spin_lock(&root->inode_lock); + empty = RB_EMPTY_ROOT(&root->inode_tree); + spin_unlock(&root->inode_lock); + if (empty) + btrfs_add_dead_root(root); + } +} + +int btrfs_invalidate_inodes(struct btrfs_root *root) +{ + struct rb_node *node; + struct rb_node *prev; + struct btrfs_inode *entry; + struct inode *inode; + u64 objectid = 0; + + WARN_ON(btrfs_root_refs(&root->root_item) != 0); + + spin_lock(&root->inode_lock); +again: + node = root->inode_tree.rb_node; + prev = NULL; + while (node) { + prev = node; + entry = rb_entry(node, struct btrfs_inode, rb_node); + + if (objectid < entry->vfs_inode.i_ino) + node = node->rb_left; + else if (objectid > entry->vfs_inode.i_ino) + node = node->rb_right; + else + break; + } + if (!node) { + while (prev) { + entry = rb_entry(prev, struct btrfs_inode, rb_node); + if (objectid <= entry->vfs_inode.i_ino) { + node = prev; + break; + } + prev = rb_next(prev); + } + } + while (node) { + entry = rb_entry(node, struct btrfs_inode, rb_node); + objectid = entry->vfs_inode.i_ino + 1; + inode = igrab(&entry->vfs_inode); + if (inode) { + spin_unlock(&root->inode_lock); + if (atomic_read(&inode->i_count) > 1) + d_prune_aliases(inode); + /* + * btrfs_drop_inode will remove it from + * the inode cache when its usage count + * hits zero. + */ + iput(inode); + cond_resched(); + spin_lock(&root->inode_lock); + goto again; + } + + if (cond_resched_lock(&root->inode_lock)) + goto again; + + node = rb_next(node); + } + spin_unlock(&root->inode_lock); + return 0; } static noinline void init_btrfs_i(struct inode *inode) @@ -3225,15 +3430,41 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, return inode; } +static struct inode *new_simple_dir(struct super_block *s, + struct btrfs_key *key, + struct btrfs_root *root) +{ + struct inode *inode = new_inode(s); + + if (!inode) + return ERR_PTR(-ENOMEM); + + init_btrfs_i(inode); + + BTRFS_I(inode)->root = root; + memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); + BTRFS_I(inode)->dummy_inode = 1; + + inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + + return inode; +} + struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) { struct inode *inode; - struct btrfs_inode *bi = BTRFS_I(dir); - struct btrfs_root *root = bi->root; + struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *sub_root = root; struct btrfs_key location; + int index; int ret; + dentry->d_op = &btrfs_dentry_operations; + if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); @@ -3242,29 +3473,50 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) if (ret < 0) return ERR_PTR(ret); - inode = NULL; - if (location.objectid) { - ret = fixup_tree_root_location(root, &location, &sub_root, - dentry); - if (ret < 0) - return ERR_PTR(ret); - if (ret > 0) - return ERR_PTR(-ENOENT); + if (location.objectid == 0) + return NULL; + + if (location.type == BTRFS_INODE_ITEM_KEY) { + inode = btrfs_iget(dir->i_sb, &location, root); + return inode; + } + + BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY); + + index = srcu_read_lock(&root->fs_info->subvol_srcu); + ret = fixup_tree_root_location(root, dir, dentry, + &location, &sub_root); + if (ret < 0) { + if (ret != -ENOENT) + inode = ERR_PTR(ret); + else + inode = new_simple_dir(dir->i_sb, &location, sub_root); + } else { inode = btrfs_iget(dir->i_sb, &location, sub_root); - if (IS_ERR(inode)) - return ERR_CAST(inode); } + srcu_read_unlock(&root->fs_info->subvol_srcu, index); + return inode; } +static int btrfs_dentry_delete(struct dentry *dentry) +{ + struct btrfs_root *root; + + if (!dentry->d_inode) + return 0; + + root = BTRFS_I(dentry->d_inode)->root; + if (btrfs_root_refs(&root->root_item) == 0) + return 1; + return 0; +} + static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode *inode; - if (dentry->d_name.len > BTRFS_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - inode = btrfs_lookup_dentry(dir, dentry); if (IS_ERR(inode)) return ERR_CAST(inode); @@ -3603,9 +3855,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (ret != 0) goto fail; - if (objectid > root->highest_inode) - root->highest_inode = objectid; - inode->i_uid = current_fsuid(); if (dir && (dir->i_mode & S_ISGID)) { @@ -3673,26 +3922,35 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, struct inode *parent_inode, struct inode *inode, const char *name, int name_len, int add_backref, u64 index) { - int ret; + int ret = 0; struct btrfs_key key; struct btrfs_root *root = BTRFS_I(parent_inode)->root; - key.objectid = inode->i_ino; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - key.offset = 0; + if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); + } else { + key.objectid = inode->i_ino; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + } + + if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, + key.objectid, root->root_key.objectid, + parent_inode->i_ino, + index, name, name_len); + } else if (add_backref) { + ret = btrfs_insert_inode_ref(trans, root, + name, name_len, inode->i_ino, + parent_inode->i_ino, index); + } - ret = btrfs_insert_dir_item(trans, root, name, name_len, - parent_inode->i_ino, - &key, btrfs_inode_type(inode), - index); if (ret == 0) { - if (add_backref) { - ret = btrfs_insert_inode_ref(trans, root, - name, name_len, - inode->i_ino, - parent_inode->i_ino, - index); - } + ret = btrfs_insert_dir_item(trans, root, name, name_len, + parent_inode->i_ino, &key, + btrfs_inode_type(inode), index); + BUG_ON(ret); + btrfs_i_size_write(parent_inode, parent_inode->i_size + name_len * 2); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; @@ -3875,18 +4133,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, err = btrfs_add_nondir(trans, dentry, inode, 1, index); - if (err) - drop_inode = 1; - - btrfs_update_inode_block_group(trans, dir); - err = btrfs_update_inode(trans, root, inode); - - if (err) + if (err) { drop_inode = 1; + } else { + btrfs_update_inode_block_group(trans, dir); + err = btrfs_update_inode(trans, root, inode); + BUG_ON(err); + btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); + } nr = trans->blocks_used; - - btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); btrfs_end_transaction_throttle(trans, root); fail: if (drop_inode) { @@ -4064,11 +4320,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, int compressed; again: - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (em) em->bdev = root->fs_info->fs_devices->latest_bdev; - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); if (em) { if (em->start > start || em->start + em->len <= start) @@ -4215,6 +4471,11 @@ again: map = kmap(page); read_extent_buffer(leaf, map + pg_offset, ptr, copy_size); + if (pg_offset + copy_size < PAGE_CACHE_SIZE) { + memset(map + pg_offset + copy_size, 0, + PAGE_CACHE_SIZE - pg_offset - + copy_size); + } kunmap(page); } flush_dcache_page(page); @@ -4259,7 +4520,7 @@ insert: } err = 0; - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); /* it is possible that someone inserted the extent into the tree * while we had the lock dropped. It is also possible that @@ -4299,7 +4560,7 @@ insert: err = 0; } } - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); out: if (path) btrfs_free_path(path); @@ -4398,13 +4659,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) u64 page_start = page_offset(page); u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + + /* + * we have the page locked, so new writeback can't start, + * and the dirty bit won't be cleared while we are here. + * + * Wait for IO on this page so that we can safely clear + * the PagePrivate2 bit and do ordered accounting + */ wait_on_page_writeback(page); + tree = &BTRFS_I(page->mapping->host)->io_tree; if (offset) { btrfs_releasepage(page, GFP_NOFS); return; } - lock_extent(tree, page_start, page_end, GFP_NOFS); ordered = btrfs_lookup_ordered_extent(page->mapping->host, page_offset(page)); @@ -4415,16 +4684,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) */ clear_extent_bit(tree, page_start, page_end, EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_LOCKED, 1, 0, GFP_NOFS); - btrfs_finish_ordered_io(page->mapping->host, - page_start, page_end); + EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); + /* + * whoever cleared the private bit is responsible + * for the finish_ordered_io + */ + if (TestClearPagePrivate2(page)) { + btrfs_finish_ordered_io(page->mapping->host, + page_start, page_end); + } btrfs_put_ordered_extent(ordered); lock_extent(tree, page_start, page_end, GFP_NOFS); } clear_extent_bit(tree, page_start, page_end, - EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_ORDERED, - 1, 1, GFP_NOFS); + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, + 1, 1, NULL, GFP_NOFS); __btrfs_releasepage(page, GFP_NOFS); ClearPageChecked(page); @@ -4521,11 +4795,14 @@ again: } ClearPageChecked(page); set_page_dirty(page); + SetPageUptodate(page); BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; unlock_extent(io_tree, page_start, page_end, GFP_NOFS); out_unlock: + if (!ret) + return VM_FAULT_LOCKED; unlock_page(page); out: return ret; @@ -4594,11 +4871,11 @@ out: * create a new subvolume directory/inode (helper for the ioctl). */ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, - struct btrfs_root *new_root, struct dentry *dentry, + struct btrfs_root *new_root, u64 new_dirid, u64 alloc_hint) { struct inode *inode; - int error; + int err; u64 index = 0; inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, @@ -4611,11 +4888,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, inode->i_nlink = 1; btrfs_i_size_write(inode, 0); - error = btrfs_update_inode(trans, new_root, inode); - if (error) - return error; + err = btrfs_update_inode(trans, new_root, inode); + BUG_ON(err); - d_instantiate(dentry, inode); + iput(inode); return 0; } @@ -4693,6 +4969,16 @@ void btrfs_destroy_inode(struct inode *inode) kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } +void btrfs_drop_inode(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + + if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) + generic_delete_inode(inode); + else + generic_drop_inode(inode); +} + static void init_once(void *foo) { struct btrfs_inode *ei = (struct btrfs_inode *) foo; @@ -4761,31 +5047,32 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(old_dir)->root; + struct btrfs_root *dest = BTRFS_I(new_dir)->root; struct inode *new_inode = new_dentry->d_inode; struct inode *old_inode = old_dentry->d_inode; struct timespec ctime = CURRENT_TIME; u64 index = 0; + u64 root_objectid; int ret; - /* we're not allowed to rename between subvolumes */ - if (BTRFS_I(old_inode)->root->root_key.objectid != - BTRFS_I(new_dir)->root->root_key.objectid) + if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) + return -EPERM; + + /* we only allow rename subvolume link between subvolumes */ + if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) return -EXDEV; - if (S_ISDIR(old_inode->i_mode) && new_inode && - new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { + if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || + (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) return -ENOTEMPTY; - } - /* to rename a snapshot or subvolume, we need to juggle the - * backrefs. This isn't coded yet - */ - if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) - return -EXDEV; + if (S_ISDIR(old_inode->i_mode) && new_inode && + new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) + return -ENOTEMPTY; ret = btrfs_check_metadata_free_space(root); if (ret) - goto out_unlock; + return ret; /* * we're using rename to replace one file with another. @@ -4796,8 +5083,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) filemap_flush(old_inode->i_mapping); + /* close the racy window with snapshot create/destroy ioctl */ + if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) + down_read(&root->fs_info->subvol_sem); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, new_dir); + + if (dest != root) + btrfs_record_root_in_trans(trans, dest); + ret = btrfs_set_inode_index(new_dir, &index); + if (ret) + goto out_fail; + + if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + /* force full log commit if subvolume involved. */ + root->fs_info->last_trans_log_full_commit = trans->transid; + } else { + ret = btrfs_insert_inode_ref(trans, dest, + new_dentry->d_name.name, + new_dentry->d_name.len, + old_inode->i_ino, + new_dir->i_ino, index); + if (ret) + goto out_fail; + /* + * this is an ugly little race, but the rename is required + * to make sure that if we crash, the inode is either at the + * old name or the new one. pinning the log transaction lets + * us make sure we don't allow a log commit to come in after + * we unlink the name but before we add the new name back in. + */ + btrfs_pin_log_trans(root); + } /* * make sure the inode gets flushed if it is replacing * something. @@ -4807,18 +5126,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, btrfs_add_ordered_operation(trans, root, old_inode); } - /* - * this is an ugly little race, but the rename is required to make - * sure that if we crash, the inode is either at the old name - * or the new one. pinning the log transaction lets us make sure - * we don't allow a log commit to come in after we unlink the - * name but before we add the new name back in. - */ - btrfs_pin_log_trans(root); - - btrfs_set_trans_block_group(trans, new_dir); - - btrfs_inc_nlink(old_dentry->d_inode); old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; @@ -4826,47 +5133,58 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); - ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, - old_dentry->d_name.name, - old_dentry->d_name.len); - if (ret) - goto out_fail; + if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; + ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, + old_dentry->d_name.name, + old_dentry->d_name.len); + } else { + btrfs_inc_nlink(old_dentry->d_inode); + ret = btrfs_unlink_inode(trans, root, old_dir, + old_dentry->d_inode, + old_dentry->d_name.name, + old_dentry->d_name.len); + } + BUG_ON(ret); if (new_inode) { new_inode->i_ctime = CURRENT_TIME; - ret = btrfs_unlink_inode(trans, root, new_dir, - new_dentry->d_inode, - new_dentry->d_name.name, - new_dentry->d_name.len); - if (ret) - goto out_fail; + if (unlikely(new_inode->i_ino == + BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { + root_objectid = BTRFS_I(new_inode)->location.objectid; + ret = btrfs_unlink_subvol(trans, dest, new_dir, + root_objectid, + new_dentry->d_name.name, + new_dentry->d_name.len); + BUG_ON(new_inode->i_nlink == 0); + } else { + ret = btrfs_unlink_inode(trans, dest, new_dir, + new_dentry->d_inode, + new_dentry->d_name.name, + new_dentry->d_name.len); + } + BUG_ON(ret); if (new_inode->i_nlink == 0) { ret = btrfs_orphan_add(trans, new_dentry->d_inode); - if (ret) - goto out_fail; + BUG_ON(ret); } - } - ret = btrfs_set_inode_index(new_dir, &index); - if (ret) - goto out_fail; - ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode, - old_inode, new_dentry->d_name.name, - new_dentry->d_name.len, 1, index); - if (ret) - goto out_fail; + ret = btrfs_add_link(trans, new_dir, old_inode, + new_dentry->d_name.name, + new_dentry->d_name.len, 0, index); + BUG_ON(ret); - btrfs_log_new_name(trans, old_inode, old_dir, - new_dentry->d_parent); + if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { + btrfs_log_new_name(trans, old_inode, old_dir, + new_dentry->d_parent); + btrfs_end_log_trans(root); + } out_fail: - - /* this btrfs_end_log_trans just allows the current - * log-sub transaction to complete - */ - btrfs_end_log_trans(root); btrfs_end_transaction_throttle(trans, root); -out_unlock: + + if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) + up_read(&root->fs_info->subvol_sem); return ret; } @@ -5058,6 +5376,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, 0, 0, 0, BTRFS_FILE_EXTENT_PREALLOC); BUG_ON(ret); + btrfs_drop_extent_cache(inode, cur_offset, + cur_offset + ins.offset -1, 0); num_bytes -= ins.offset; cur_offset += ins.offset; alloc_hint = ins.objectid + ins.offset; @@ -5223,6 +5543,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { .lookup = btrfs_lookup, .permission = btrfs_permission, }; + static struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, @@ -5269,6 +5590,7 @@ static const struct address_space_operations btrfs_aops = { .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, .set_page_dirty = btrfs_set_page_dirty, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations btrfs_symlink_aops = { @@ -5309,3 +5631,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, }; + +struct dentry_operations btrfs_dentry_operations = { + .d_delete = btrfs_dentry_delete, +}; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bd88f25889f..a8577a7f26a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -230,8 +230,8 @@ static noinline int create_subvol(struct btrfs_root *root, struct btrfs_root_item root_item; struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; - struct btrfs_root *new_root = root; - struct inode *dir; + struct btrfs_root *new_root; + struct inode *dir = dentry->d_parent->d_inode; int ret; int err; u64 objectid; @@ -241,7 +241,7 @@ static noinline int create_subvol(struct btrfs_root *root, ret = btrfs_check_metadata_free_space(root); if (ret) - goto fail_commit; + return ret; trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); @@ -304,11 +304,17 @@ static noinline int create_subvol(struct btrfs_root *root, if (ret) goto fail; + key.offset = (u64)-1; + new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); + BUG_ON(IS_ERR(new_root)); + + btrfs_record_root_in_trans(trans, new_root); + + ret = btrfs_create_subvol_root(trans, new_root, new_dirid, + BTRFS_I(dir)->block_group); /* * insert the directory item */ - key.offset = (u64)-1; - dir = dentry->d_parent->d_inode; ret = btrfs_set_inode_index(dir, &index); BUG_ON(ret); @@ -322,44 +328,18 @@ static noinline int create_subvol(struct btrfs_root *root, ret = btrfs_update_inode(trans, root, dir); BUG_ON(ret); - /* add the backref first */ ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, - objectid, BTRFS_ROOT_BACKREF_KEY, - root->root_key.objectid, + objectid, root->root_key.objectid, dir->i_ino, index, name, namelen); BUG_ON(ret); - /* now add the forward ref */ - ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, - root->root_key.objectid, BTRFS_ROOT_REF_KEY, - objectid, - dir->i_ino, index, name, namelen); - - BUG_ON(ret); - - ret = btrfs_commit_transaction(trans, root); - if (ret) - goto fail_commit; - - new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); - BUG_ON(!new_root); - - trans = btrfs_start_transaction(new_root, 1); - BUG_ON(!trans); - - ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid, - BTRFS_I(dir)->block_group); - if (ret) - goto fail; - + d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); fail: nr = trans->blocks_used; - err = btrfs_commit_transaction(trans, new_root); + err = btrfs_commit_transaction(trans, root); if (err && !ret) ret = err; -fail_commit: - btrfs_btree_balance_dirty(root, nr); return ret; } @@ -420,14 +400,15 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) * sys_mkdirat and vfs_mkdir, but we only do a single component lookup * inside this filesystem so it's quite a bit simpler. */ -static noinline int btrfs_mksubvol(struct path *parent, char *name, - int mode, int namelen, +static noinline int btrfs_mksubvol(struct path *parent, + char *name, int namelen, struct btrfs_root *snap_src) { + struct inode *dir = parent->dentry->d_inode; struct dentry *dentry; int error; - mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); dentry = lookup_one_len(name, parent->dentry, namelen); error = PTR_ERR(dentry); @@ -438,99 +419,39 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, if (dentry->d_inode) goto out_dput; - if (!IS_POSIXACL(parent->dentry->d_inode)) - mode &= ~current_umask(); - error = mnt_want_write(parent->mnt); if (error) goto out_dput; - error = btrfs_may_create(parent->dentry->d_inode, dentry); + error = btrfs_may_create(dir, dentry); if (error) goto out_drop_write; - /* - * Actually perform the low-level subvolume creation after all - * this VFS fuzz. - * - * Eventually we want to pass in an inode under which we create this - * subvolume, but for now all are under the filesystem root. - * - * Also we should pass on the mode eventually to allow creating new - * subvolume with specific mode bits. - */ + down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); + + if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) + goto out_up_read; + if (snap_src) { - struct dentry *dir = dentry->d_parent; - struct dentry *test = dir->d_parent; - struct btrfs_path *path = btrfs_alloc_path(); - int ret; - u64 test_oid; - u64 parent_oid = BTRFS_I(dir->d_inode)->root->root_key.objectid; - - test_oid = snap_src->root_key.objectid; - - ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, - path, parent_oid, test_oid); - if (ret == 0) - goto create; - btrfs_release_path(snap_src->fs_info->tree_root, path); - - /* we need to make sure we aren't creating a directory loop - * by taking a snapshot of something that has our current - * subvol in its directory tree. So, this loops through - * the dentries and checks the forward refs for each subvolume - * to see if is references the subvolume where we are - * placing this new snapshot. - */ - while (1) { - if (!test || - dir == snap_src->fs_info->sb->s_root || - test == snap_src->fs_info->sb->s_root || - test->d_inode->i_sb != snap_src->fs_info->sb) { - break; - } - if (S_ISLNK(test->d_inode->i_mode)) { - printk(KERN_INFO "Btrfs symlink in snapshot " - "path, failed\n"); - error = -EMLINK; - btrfs_free_path(path); - goto out_drop_write; - } - test_oid = - BTRFS_I(test->d_inode)->root->root_key.objectid; - ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, - path, test_oid, parent_oid); - if (ret == 0) { - printk(KERN_INFO "Btrfs snapshot creation " - "failed, looping\n"); - error = -EMLINK; - btrfs_free_path(path); - goto out_drop_write; - } - btrfs_release_path(snap_src->fs_info->tree_root, path); - test = test->d_parent; - } -create: - btrfs_free_path(path); - error = create_snapshot(snap_src, dentry, name, namelen); + error = create_snapshot(snap_src, dentry, + name, namelen); } else { - error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, - dentry, name, namelen); + error = create_subvol(BTRFS_I(dir)->root, dentry, + name, namelen); } - if (error) - goto out_drop_write; - - fsnotify_mkdir(parent->dentry->d_inode, dentry); + if (!error) + fsnotify_mkdir(dir, dentry); +out_up_read: + up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); out_drop_write: mnt_drop_write(parent->mnt); out_dput: dput(dentry); out_unlock: - mutex_unlock(&parent->dentry->d_inode->i_mutex); + mutex_unlock(&dir->i_mutex); return error; } - static int btrfs_defrag_file(struct file *file) { struct inode *inode = fdentry(file)->d_inode; @@ -596,9 +517,8 @@ again: clear_page_dirty_for_io(page); btrfs_set_extent_delalloc(inode, page_start, page_end); - - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); page_cache_release(page); balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); @@ -609,7 +529,8 @@ out_unlock: return 0; } -static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) +static noinline int btrfs_ioctl_resize(struct btrfs_root *root, + void __user *arg) { u64 new_size; u64 old_size; @@ -718,10 +639,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, { struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_vol_args *vol_args; - struct btrfs_dir_item *di; - struct btrfs_path *path; struct file *src_file; - u64 root_dirid; int namelen; int ret = 0; @@ -739,32 +657,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, goto out; } - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - - root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, - di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, - path, root_dirid, - vol_args->name, namelen, 0); - btrfs_free_path(path); - - if (di && !IS_ERR(di)) { - ret = -EEXIST; - goto out; - } - - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out; - } - if (subvol) { - ret = btrfs_mksubvol(&file->f_path, vol_args->name, - file->f_path.dentry->d_inode->i_mode, - namelen, NULL); + ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, + NULL); } else { struct inode *src_inode; src_file = fget(vol_args->fd); @@ -781,17 +676,156 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, fput(src_file); goto out; } - ret = btrfs_mksubvol(&file->f_path, vol_args->name, - file->f_path.dentry->d_inode->i_mode, - namelen, BTRFS_I(src_inode)->root); + ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, + BTRFS_I(src_inode)->root); fput(src_file); } - out: kfree(vol_args); return ret; } +/* + * helper to check if the subvolume references other subvolumes + */ +static noinline int may_destroy_subvol(struct btrfs_root *root) +{ + struct btrfs_path *path; + struct btrfs_key key; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = root->root_key.objectid; + key.type = BTRFS_ROOT_REF_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root->fs_info->tree_root, + &key, path, 0, 0); + if (ret < 0) + goto out; + BUG_ON(ret == 0); + + ret = 0; + if (path->slots[0] > 0) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid == root->root_key.objectid && + key.type == BTRFS_ROOT_REF_KEY) + ret = -ENOTEMPTY; + } +out: + btrfs_free_path(path); + return ret; +} + +static noinline int btrfs_ioctl_snap_destroy(struct file *file, + void __user *arg) +{ + struct dentry *parent = fdentry(file); + struct dentry *dentry; + struct inode *dir = parent->d_inode; + struct inode *inode; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_root *dest = NULL; + struct btrfs_ioctl_vol_args *vol_args; + struct btrfs_trans_handle *trans; + int namelen; + int ret; + int err = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + vol_args = memdup_user(arg, sizeof(*vol_args)); + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); + + vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; + namelen = strlen(vol_args->name); + if (strchr(vol_args->name, '/') || + strncmp(vol_args->name, "..", namelen) == 0) { + err = -EINVAL; + goto out; + } + + err = mnt_want_write(file->f_path.mnt); + if (err) + goto out; + + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + dentry = lookup_one_len(vol_args->name, parent, namelen); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto out_unlock_dir; + } + + if (!dentry->d_inode) { + err = -ENOENT; + goto out_dput; + } + + inode = dentry->d_inode; + if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { + err = -EINVAL; + goto out_dput; + } + + dest = BTRFS_I(inode)->root; + + mutex_lock(&inode->i_mutex); + err = d_invalidate(dentry); + if (err) + goto out_unlock; + + down_write(&root->fs_info->subvol_sem); + + err = may_destroy_subvol(dest); + if (err) + goto out_up_write; + + trans = btrfs_start_transaction(root, 1); + ret = btrfs_unlink_subvol(trans, root, dir, + dest->root_key.objectid, + dentry->d_name.name, + dentry->d_name.len); + BUG_ON(ret); + + btrfs_record_root_in_trans(trans, dest); + + memset(&dest->root_item.drop_progress, 0, + sizeof(dest->root_item.drop_progress)); + dest->root_item.drop_level = 0; + btrfs_set_root_refs(&dest->root_item, 0); + + ret = btrfs_insert_orphan_item(trans, + root->fs_info->tree_root, + dest->root_key.objectid); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + inode->i_flags |= S_DEAD; +out_up_write: + up_write(&root->fs_info->subvol_sem); +out_unlock: + mutex_unlock(&inode->i_mutex); + if (!err) { + btrfs_invalidate_inodes(dest); + d_delete(dentry); + } +out_dput: + dput(dentry); +out_unlock_dir: + mutex_unlock(&dir->i_mutex); + mnt_drop_write(file->f_path.mnt); +out: + kfree(vol_args); + return err; +} + static int btrfs_ioctl_defrag(struct file *file) { struct inode *inode = fdentry(file)->d_inode; @@ -865,8 +899,8 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) return ret; } -static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, - u64 off, u64 olen, u64 destoff) +static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, + u64 off, u64 olen, u64 destoff) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -976,7 +1010,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, /* punch hole in destination first */ btrfs_drop_extents(trans, root, inode, off, off + len, - off + len, 0, &hint_byte); + off + len, 0, &hint_byte, 1); /* clone data */ key.objectid = src->i_ino; @@ -1071,8 +1105,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, datao += off - key.offset; datal -= off - key.offset; } - if (key.offset + datao + datal + key.offset > - off + len) + if (key.offset + datao + datal > off + len) datal = off + len - key.offset - datao; /* disko == 0 means it's a hole */ if (!disko) @@ -1258,6 +1291,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_snap_create(file, argp, 0); case BTRFS_IOC_SUBVOL_CREATE: return btrfs_ioctl_snap_create(file, argp, 1); + case BTRFS_IOC_SNAP_DESTROY: + return btrfs_ioctl_snap_destroy(file, argp); case BTRFS_IOC_DEFRAG: return btrfs_ioctl_defrag(file); case BTRFS_IOC_RESIZE: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index b320b103fa1..bc49914475e 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -65,5 +65,6 @@ struct btrfs_ioctl_clone_range_args { #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ struct btrfs_ioctl_vol_args) - +#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 7b2f401e604..b5d6d24726b 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, * * len is the length of the extent * - * This also sets the EXTENT_ORDERED bit on the range in the inode. - * * The tree is given a single reference on the ordered extent that was * inserted. */ @@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->start = start; entry->len = len; entry->disk_len = disk_len; + entry->bytes_left = len; entry->inode = inode; if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) set_bit(type, &entry->flags); @@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, &entry->rb_node); BUG_ON(node); - set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, - entry_end(entry) - 1, GFP_NOFS); - spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); list_add_tail(&entry->root_extent_list, &BTRFS_I(inode)->root->fs_info->ordered_extents); @@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, struct btrfs_ordered_inode_tree *tree; struct rb_node *node; struct btrfs_ordered_extent *entry; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; int ret; tree = &BTRFS_I(inode)->ordered_tree; mutex_lock(&tree->mutex); - clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, - GFP_NOFS); node = tree_search(tree, file_offset); if (!node) { ret = 1; @@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, goto out; } - ret = test_range_bit(io_tree, entry->file_offset, - entry->file_offset + entry->len - 1, - EXTENT_ORDERED, 0); - if (ret == 0) + if (io_size > entry->bytes_left) { + printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", + (unsigned long long)entry->bytes_left, + (unsigned long long)io_size); + } + entry->bytes_left -= io_size; + if (entry->bytes_left == 0) ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); + else + ret = 1; out: mutex_unlock(&tree->mutex); return ret == 0; @@ -476,6 +474,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) u64 orig_end; u64 wait_end; struct btrfs_ordered_extent *ordered; + int found; if (start + len < start) { orig_end = INT_LIMIT(loff_t); @@ -502,6 +501,7 @@ again: orig_end >> PAGE_CACHE_SHIFT); end = orig_end; + found = 0; while (1) { ordered = btrfs_lookup_first_ordered_extent(inode, end); if (!ordered) @@ -514,6 +514,7 @@ again: btrfs_put_ordered_extent(ordered); break; } + found++; btrfs_start_ordered_extent(inode, ordered, 1); end = ordered->file_offset; btrfs_put_ordered_extent(ordered); @@ -521,8 +522,8 @@ again: break; end--; } - if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, - EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { + if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, + EXTENT_DELALLOC, 0, NULL)) { schedule_timeout(1); goto again; } @@ -613,7 +614,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, */ if (test_range_bit(io_tree, disk_i_size, ordered->file_offset + ordered->len - 1, - EXTENT_DELALLOC, 0)) { + EXTENT_DELALLOC, 0, NULL)) { goto out; } /* @@ -664,7 +665,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, */ if (i_size_test > entry_end(ordered) && !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, - EXTENT_DELALLOC, 0)) { + EXTENT_DELALLOC, 0, NULL)) { new_i_size = min_t(u64, i_size_test, i_size_read(inode)); } BTRFS_I(inode)->disk_i_size = new_i_size; diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 3d31c8827b0..993a7ea45c7 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -85,6 +85,9 @@ struct btrfs_ordered_extent { /* extent length on disk */ u64 disk_len; + /* number of bytes that still need writing */ + u64 bytes_left; + /* flags (described above) */ unsigned long flags; diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 3c0d52af4f8..79cba5fbc28 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c @@ -65,3 +65,23 @@ out: btrfs_free_path(path); return ret; } + +int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset) +{ + struct btrfs_path *path; + struct btrfs_key key; + int ret; + + key.objectid = BTRFS_ORPHAN_OBJECTID; + key.type = BTRFS_ORPHAN_ITEM_KEY; + key.offset = offset; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + + btrfs_free_path(path); + return ret; +} diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c04f7f21260..361ad323faa 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -121,6 +121,15 @@ struct inodevec { int nr; }; +#define MAX_EXTENTS 128 + +struct file_extent_cluster { + u64 start; + u64 end; + u64 boundary[MAX_EXTENTS]; + unsigned int nr; +}; + struct reloc_control { /* block group to relocate */ struct btrfs_block_group_cache *block_group; @@ -2180,7 +2189,7 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, struct reloc_control *rc) { if (test_range_bit(&rc->processed_blocks, bytenr, - bytenr + blocksize - 1, EXTENT_DIRTY, 1)) + bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) return 1; return 0; } @@ -2529,56 +2538,94 @@ out: } static noinline_for_stack -int relocate_inode_pages(struct inode *inode, u64 start, u64 len) +int setup_extent_mapping(struct inode *inode, u64 start, u64 end, + u64 block_start) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *em; + int ret = 0; + + em = alloc_extent_map(GFP_NOFS); + if (!em) + return -ENOMEM; + + em->start = start; + em->len = end + 1 - start; + em->block_len = em->len; + em->block_start = block_start; + em->bdev = root->fs_info->fs_devices->latest_bdev; + set_bit(EXTENT_FLAG_PINNED, &em->flags); + + lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); + while (1) { + write_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + write_unlock(&em_tree->lock); + if (ret != -EEXIST) { + free_extent_map(em); + break; + } + btrfs_drop_extent_cache(inode, start, end, 0); + } + unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); + return ret; +} + +static int relocate_file_extent_cluster(struct inode *inode, + struct file_extent_cluster *cluster) { u64 page_start; u64 page_end; - unsigned long i; - unsigned long first_index; + u64 offset = BTRFS_I(inode)->index_cnt; + unsigned long index; unsigned long last_index; - unsigned int total_read = 0; - unsigned int total_dirty = 0; + unsigned int dirty_page = 0; struct page *page; struct file_ra_state *ra; - struct btrfs_ordered_extent *ordered; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + int nr = 0; int ret = 0; + if (!cluster->nr) + return 0; + ra = kzalloc(sizeof(*ra), GFP_NOFS); if (!ra) return -ENOMEM; + index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; + last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; + mutex_lock(&inode->i_mutex); - first_index = start >> PAGE_CACHE_SHIFT; - last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; - /* make sure the dirty trick played by the caller work */ - while (1) { - ret = invalidate_inode_pages2_range(inode->i_mapping, - first_index, last_index); - if (ret != -EBUSY) - break; - schedule_timeout(HZ/10); - } + i_size_write(inode, cluster->end + 1 - offset); + ret = setup_extent_mapping(inode, cluster->start - offset, + cluster->end - offset, cluster->start); if (ret) goto out_unlock; file_ra_state_init(ra, inode->i_mapping); - for (i = first_index ; i <= last_index; i++) { - if (total_read % ra->ra_pages == 0) { - btrfs_force_ra(inode->i_mapping, ra, NULL, i, - min(last_index, ra->ra_pages + i - 1)); - } - total_read++; -again: - if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) - BUG_ON(1); - page = grab_cache_page(inode->i_mapping, i); + WARN_ON(cluster->start != cluster->boundary[0]); + while (index <= last_index) { + page = find_lock_page(inode->i_mapping, index); if (!page) { - ret = -ENOMEM; - goto out_unlock; + page_cache_sync_readahead(inode->i_mapping, + ra, NULL, index, + last_index + 1 - index); + page = grab_cache_page(inode->i_mapping, index); + if (!page) { + ret = -ENOMEM; + goto out_unlock; + } + } + + if (PageReadahead(page)) { + page_cache_async_readahead(inode->i_mapping, + ra, NULL, page, index, + last_index + 1 - index); } + if (!PageUptodate(page)) { btrfs_readpage(NULL, page); lock_page(page); @@ -2589,75 +2636,79 @@ again: goto out_unlock; } } - wait_on_page_writeback(page); page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(io_tree, page_start, page_end, GFP_NOFS); - - ordered = btrfs_lookup_ordered_extent(inode, page_start); - if (ordered) { - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); - unlock_page(page); - page_cache_release(page); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - goto again; - } + + lock_extent(&BTRFS_I(inode)->io_tree, + page_start, page_end, GFP_NOFS); + set_page_extent_mapped(page); - if (i == first_index) - set_extent_bits(io_tree, page_start, page_end, + if (nr < cluster->nr && + page_start + offset == cluster->boundary[nr]) { + set_extent_bits(&BTRFS_I(inode)->io_tree, + page_start, page_end, EXTENT_BOUNDARY, GFP_NOFS); + nr++; + } btrfs_set_extent_delalloc(inode, page_start, page_end); set_page_dirty(page); - total_dirty++; + dirty_page++; - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_extent(&BTRFS_I(inode)->io_tree, + page_start, page_end, GFP_NOFS); unlock_page(page); page_cache_release(page); + + index++; + if (nr < cluster->nr && + page_end + 1 + offset == cluster->boundary[nr]) { + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + dirty_page); + dirty_page = 0; + } + } + if (dirty_page) { + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + dirty_page); } + WARN_ON(nr != cluster->nr); out_unlock: mutex_unlock(&inode->i_mutex); kfree(ra); - balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); return ret; } static noinline_for_stack -int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) +int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, + struct file_extent_cluster *cluster) { - struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct extent_map *em; - u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt; - u64 end = start + extent_key->offset - 1; - - em = alloc_extent_map(GFP_NOFS); - em->start = start; - em->len = extent_key->offset; - em->block_len = extent_key->offset; - em->block_start = extent_key->objectid; - em->bdev = root->fs_info->fs_devices->latest_bdev; - set_bit(EXTENT_FLAG_PINNED, &em->flags); + int ret; - /* setup extent map to cheat btrfs_readpage */ - lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); - while (1) { - int ret; - spin_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); - if (ret != -EEXIST) { - free_extent_map(em); - break; - } - btrfs_drop_extent_cache(inode, start, end, 0); + if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { + ret = relocate_file_extent_cluster(inode, cluster); + if (ret) + return ret; + cluster->nr = 0; } - unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); - return relocate_inode_pages(inode, start, extent_key->offset); + if (!cluster->nr) + cluster->start = extent_key->objectid; + else + BUG_ON(cluster->nr >= MAX_EXTENTS); + cluster->end = extent_key->objectid + extent_key->offset - 1; + cluster->boundary[cluster->nr] = extent_key->objectid; + cluster->nr++; + + if (cluster->nr >= MAX_EXTENTS) { + ret = relocate_file_extent_cluster(inode, cluster); + if (ret) + return ret; + cluster->nr = 0; + } + return 0; } #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 @@ -3203,10 +3254,12 @@ static int check_extent_flags(u64 flags) return 0; } + static noinline_for_stack int relocate_block_group(struct reloc_control *rc) { struct rb_root blocks = RB_ROOT; struct btrfs_key key; + struct file_extent_cluster *cluster; struct btrfs_trans_handle *trans = NULL; struct btrfs_path *path; struct btrfs_extent_item *ei; @@ -3216,10 +3269,17 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) int ret; int err = 0; + cluster = kzalloc(sizeof(*cluster), GFP_NOFS); + if (!cluster) + return -ENOMEM; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; + rc->extents_found = 0; + rc->extents_skipped = 0; + rc->search_start = rc->block_group->key.objectid; clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, GFP_NOFS); @@ -3306,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) } nr = trans->blocks_used; - btrfs_end_transaction_throttle(trans, rc->extent_root); + btrfs_end_transaction(trans, rc->extent_root); trans = NULL; btrfs_btree_balance_dirty(rc->extent_root, nr); if (rc->stage == MOVE_DATA_EXTENTS && (flags & BTRFS_EXTENT_FLAG_DATA)) { rc->found_file_extent = 1; - ret = relocate_data_extent(rc->data_inode, &key); + ret = relocate_data_extent(rc->data_inode, + &key, cluster); if (ret < 0) { err = ret; break; @@ -3328,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) btrfs_btree_balance_dirty(rc->extent_root, nr); } + if (!err) { + ret = relocate_file_extent_cluster(rc->data_inode, cluster); + if (ret < 0) + err = ret; + } + + kfree(cluster); + rc->create_reloc_root = 0; smp_mb(); @@ -3348,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) } static int __insert_orphan_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, u64 size) + struct btrfs_root *root, u64 objectid) { struct btrfs_path *path; struct btrfs_inode_item *item; @@ -3368,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); btrfs_set_inode_generation(leaf, item, 1); - btrfs_set_inode_size(leaf, item, size); + btrfs_set_inode_size(leaf, item, 0); btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); btrfs_mark_buffer_dirty(leaf); @@ -3404,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, if (err) goto out; - err = __insert_orphan_inode(trans, root, objectid, group->key.offset); - BUG_ON(err); - - err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, - group->key.offset, 0, group->key.offset, - 0, 0, 0); + err = __insert_orphan_inode(trans, root, objectid); BUG_ON(err); key.objectid = objectid; @@ -3475,14 +3538,15 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) btrfs_wait_ordered_extents(fs_info->tree_root, 0); while (1) { - mutex_lock(&fs_info->cleaner_mutex); - btrfs_clean_old_snapshots(fs_info->tree_root); - mutex_unlock(&fs_info->cleaner_mutex); - rc->extents_found = 0; rc->extents_skipped = 0; + mutex_lock(&fs_info->cleaner_mutex); + + btrfs_clean_old_snapshots(fs_info->tree_root); ret = relocate_block_group(rc); + + mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { err = ret; break; @@ -3514,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) } } - filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, - rc->block_group->key.objectid, - rc->block_group->key.objectid + - rc->block_group->key.offset - 1); + filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, + rc->block_group->key.objectid, + rc->block_group->key.objectid + + rc->block_group->key.offset - 1); WARN_ON(rc->block_group->pinned > 0); WARN_ON(rc->block_group->reserved > 0); @@ -3530,6 +3594,26 @@ out: return err; } +static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) +{ + struct btrfs_trans_handle *trans; + int ret; + + trans = btrfs_start_transaction(root->fs_info->tree_root, 1); + + memset(&root->root_item.drop_progress, 0, + sizeof(root->root_item.drop_progress)); + root->root_item.drop_level = 0; + btrfs_set_root_refs(&root->root_item, 0); + ret = btrfs_update_root(trans, root->fs_info->tree_root, + &root->root_key, &root->root_item); + BUG_ON(ret); + + ret = btrfs_end_transaction(trans, root->fs_info->tree_root); + BUG_ON(ret); + return 0; +} + /* * recover relocation interrupted by system crash. * @@ -3589,8 +3673,12 @@ int btrfs_recover_relocation(struct btrfs_root *root) fs_root = read_fs_root(root->fs_info, reloc_root->root_key.offset); if (IS_ERR(fs_root)) { - err = PTR_ERR(fs_root); - goto out; + ret = PTR_ERR(fs_root); + if (ret != -ENOENT) { + err = ret; + goto out; + } + mark_garbage_root(reloc_root); } } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0ddc6d61c55..9351428f30e 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -94,17 +94,23 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, goto out; BUG_ON(ret == 0); + if (path->slots[0] == 0) { + ret = 1; + goto out; + } l = path->nodes[0]; - BUG_ON(path->slots[0] == 0); slot = path->slots[0] - 1; btrfs_item_key_to_cpu(l, &found_key, slot); - if (found_key.objectid != objectid) { + if (found_key.objectid != objectid || + found_key.type != BTRFS_ROOT_ITEM_KEY) { ret = 1; goto out; } - read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), - sizeof(*item)); - memcpy(key, &found_key, sizeof(found_key)); + if (item) + read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), + sizeof(*item)); + if (key) + memcpy(key, &found_key, sizeof(found_key)); ret = 0; out: btrfs_free_path(path); @@ -249,6 +255,59 @@ err: return ret; } +int btrfs_find_orphan_roots(struct btrfs_root *tree_root) +{ + struct extent_buffer *leaf; + struct btrfs_path *path; + struct btrfs_key key; + int err = 0; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_ORPHAN_OBJECTID; + key.type = BTRFS_ORPHAN_ITEM_KEY; + key.offset = 0; + + while (1) { + ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); + if (ret < 0) { + err = ret; + break; + } + + leaf = path->nodes[0]; + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(tree_root, path); + if (ret < 0) + err = ret; + if (ret != 0) + break; + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + btrfs_release_path(tree_root, path); + + if (key.objectid != BTRFS_ORPHAN_OBJECTID || + key.type != BTRFS_ORPHAN_ITEM_KEY) + break; + + ret = btrfs_find_dead_roots(tree_root, key.offset); + if (ret) { + err = ret; + break; + } + + key.offset++; + } + + btrfs_free_path(path); + return err; +} + /* drop the root item for 'key' from 'root' */ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key) @@ -278,31 +337,57 @@ out: return ret; } -#if 0 /* this will get used when snapshot deletion is implemented */ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root, - u64 root_id, u8 type, u64 ref_id) + u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, + const char *name, int name_len) + { + struct btrfs_path *path; + struct btrfs_root_ref *ref; + struct extent_buffer *leaf; struct btrfs_key key; + unsigned long ptr; + int err = 0; int ret; - struct btrfs_path *path; path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; key.objectid = root_id; - key.type = type; + key.type = BTRFS_ROOT_BACKREF_KEY; key.offset = ref_id; - +again: ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); - BUG_ON(ret); - - ret = btrfs_del_item(trans, tree_root, path); - BUG_ON(ret); + BUG_ON(ret < 0); + if (ret == 0) { + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_root_ref); + + WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid); + WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len); + ptr = (unsigned long)(ref + 1); + WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len)); + *sequence = btrfs_root_ref_sequence(leaf, ref); + + ret = btrfs_del_item(trans, tree_root, path); + BUG_ON(ret); + } else + err = -ENOENT; + + if (key.type == BTRFS_ROOT_BACKREF_KEY) { + btrfs_release_path(tree_root, path); + key.objectid = ref_id; + key.type = BTRFS_ROOT_REF_KEY; + key.offset = root_id; + goto again; + } btrfs_free_path(path); - return ret; + return err; } -#endif int btrfs_find_root_ref(struct btrfs_root *tree_root, struct btrfs_path *path, @@ -319,7 +404,6 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root, return ret; } - /* * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY * or BTRFS_ROOT_BACKREF_KEY. @@ -335,8 +419,7 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root, */ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root, - u64 root_id, u8 type, u64 ref_id, - u64 dirid, u64 sequence, + u64 root_id, u64 ref_id, u64 dirid, u64 sequence, const char *name, int name_len) { struct btrfs_key key; @@ -346,13 +429,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; unsigned long ptr; - path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; key.objectid = root_id; - key.type = type; + key.type = BTRFS_ROOT_BACKREF_KEY; key.offset = ref_id; - +again: ret = btrfs_insert_empty_item(trans, tree_root, path, &key, sizeof(*ref) + name_len); BUG_ON(ret); @@ -366,6 +450,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, write_extent_buffer(leaf, name, ptr, name_len); btrfs_mark_buffer_dirty(leaf); + if (key.type == BTRFS_ROOT_BACKREF_KEY) { + btrfs_release_path(tree_root, path); + key.objectid = ref_id; + key.type = BTRFS_ROOT_REF_KEY; + key.offset = root_id; + goto again; + } + btrfs_free_path(path); - return ret; + return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2db17cd66fc..67035385444 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -676,6 +676,7 @@ static int btrfs_unfreeze(struct super_block *sb) } static const struct super_operations btrfs_super_ops = { + .drop_inode = btrfs_drop_inode, .delete_inode = btrfs_delete_inode, .put_super = btrfs_put_super, .sync_fs = btrfs_sync_fs, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cdbb5022da5..88f866f85e7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -104,7 +104,6 @@ static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, { if (root->ref_cows && root->last_trans < trans->transid) { WARN_ON(root == root->fs_info->extent_root); - WARN_ON(root->root_item.refs == 0); WARN_ON(root->commit_root != root->node); radix_tree_tag_set(&root->fs_info->fs_roots_radix, @@ -720,7 +719,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); key.objectid = objectid; - key.offset = 0; + /* record when the snapshot was created in key.offset */ + key.offset = trans->transid; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); old = btrfs_lock_root_node(root); @@ -778,24 +778,14 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, ret = btrfs_update_inode(trans, parent_root, parent_inode); BUG_ON(ret); - /* add the backref first */ ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, pending->root_key.objectid, - BTRFS_ROOT_BACKREF_KEY, parent_root->root_key.objectid, parent_inode->i_ino, index, pending->name, namelen); BUG_ON(ret); - /* now add the forward ref */ - ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, - parent_root->root_key.objectid, - BTRFS_ROOT_REF_KEY, - pending->root_key.objectid, - parent_inode->i_ino, index, pending->name, - namelen); - inode = btrfs_lookup_dentry(parent_inode, pending->dentry); d_instantiate(pending->dentry, inode); fail: @@ -874,7 +864,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, unsigned long timeout = 1; struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; - struct extent_io_tree *pinned_copy; DEFINE_WAIT(wait); int ret; int should_grow = 0; @@ -915,13 +904,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return 0; } - pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); - if (!pinned_copy) - return -ENOMEM; - - extent_io_tree_init(pinned_copy, - root->fs_info->btree_inode->i_mapping, GFP_NOFS); - trans->transaction->in_commit = 1; trans->transaction->blocked = 1; if (cur_trans->list.prev != &root->fs_info->trans_list) { @@ -1019,6 +1001,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = commit_cowonly_roots(trans, root); BUG_ON(ret); + btrfs_prepare_extent_commit(trans, root); + cur_trans = root->fs_info->running_transaction; spin_lock(&root->fs_info->new_trans_lock); root->fs_info->running_transaction = NULL; @@ -1042,8 +1026,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, sizeof(root->fs_info->super_copy)); - btrfs_copy_pinned(root, pinned_copy); - trans->transaction->blocked = 0; wake_up(&root->fs_info->transaction_wait); @@ -1059,8 +1041,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, */ mutex_unlock(&root->fs_info->tree_log_mutex); - btrfs_finish_extent_commit(trans, root, pinned_copy); - kfree(pinned_copy); + btrfs_finish_extent_commit(trans, root); /* do the directory inserts of any pending snapshot creations */ finish_pending_snapshots(trans, root->fs_info); @@ -1096,8 +1077,13 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) while (!list_empty(&list)) { root = list_entry(list.next, struct btrfs_root, root_list); - list_del_init(&root->root_list); - btrfs_drop_snapshot(root, 0); + list_del(&root->root_list); + + if (btrfs_header_backref_rev(root->node) < + BTRFS_MIXED_BACKREF_REV) + btrfs_drop_snapshot(root, 0); + else + btrfs_drop_snapshot(root, 1); } return 0; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 30c0d45c1b5..7827841b55c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -263,8 +263,8 @@ static int process_one_buffer(struct btrfs_root *log, struct walk_control *wc, u64 gen) { if (wc->pin) - btrfs_update_pinned_extents(log->fs_info->extent_root, - eb->start, eb->len, 1); + btrfs_pin_extent(log->fs_info->extent_root, + eb->start, eb->len, 0); if (btrfs_buffer_uptodate(eb, gen)) { if (wc->write) @@ -534,7 +534,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ ret = btrfs_drop_extents(trans, root, inode, - start, extent_end, extent_end, start, &alloc_hint); + start, extent_end, extent_end, start, &alloc_hint, 1); BUG_ON(ret); if (found_type == BTRFS_FILE_EXTENT_REG || @@ -2841,7 +2841,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) break; - if (parent == sb->s_root) + if (IS_ROOT(parent)) break; parent = parent->d_parent; @@ -2880,6 +2880,12 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, goto end_no_trans; } + if (root != BTRFS_I(inode)->root || + btrfs_root_refs(&root->root_item) == 0) { + ret = 1; + goto end_no_trans; + } + ret = check_parent_dirs_for_sync(trans, inode, parent, sb, last_committed); if (ret) @@ -2907,12 +2913,15 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, break; inode = parent->d_inode; + if (root != BTRFS_I(inode)->root) + break; + if (BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { ret = btrfs_log_inode(trans, root, inode, inode_only); BUG_ON(ret); } - if (parent == sb->s_root) + if (IS_ROOT(parent)) break; parent = parent->d_parent; @@ -2951,7 +2960,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) struct btrfs_key tmp_key; struct btrfs_root *log; struct btrfs_fs_info *fs_info = log_root_tree->fs_info; - u64 highest_inode; struct walk_control wc = { .process_func = process_one_buffer, .stage = 0, @@ -3010,11 +3018,6 @@ again: path); BUG_ON(ret); } - ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode); - if (ret == 0) { - wc.replay_dest->highest_inode = highest_inode; - wc.replay_dest->last_inode_alloc = highest_inode; - } key.offset = found_key.offset - 1; wc.replay_dest->log_root = NULL; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5cf405b0828..23e7d36ff32 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -276,7 +276,7 @@ loop_lock: * is now congested. Back off and let other work structs * run instead */ - if (pending && bdi_write_congested(bdi) && batch_run > 32 && + if (pending && bdi_write_congested(bdi) && batch_run > 8 && fs_info->fs_devices->open_devices > 1) { struct io_context *ioc; @@ -719,10 +719,9 @@ error: * called very infrequently and that a given device has a small number * of extents */ -static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, - struct btrfs_device *device, - u64 num_bytes, u64 *start, - u64 *max_avail) +int find_free_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, u64 num_bytes, + u64 *start, u64 *max_avail) { struct btrfs_key key; struct btrfs_root *root = device->dev_root; @@ -1736,6 +1735,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, extent_root = root->fs_info->extent_root; em_tree = &root->fs_info->mapping_tree.map_tree; + ret = btrfs_can_relocate(extent_root, chunk_offset); + if (ret) + return -ENOSPC; + /* step one, relocate all the extents inside this chunk */ ret = btrfs_relocate_block_group(extent_root, chunk_offset); BUG_ON(ret); @@ -1749,9 +1752,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, * step two, delete the device extents and the * chunk tree entries */ - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, chunk_offset, 1); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); BUG_ON(em->start > chunk_offset || em->start + em->len < chunk_offset); @@ -1780,9 +1783,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); BUG_ON(ret); - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); remove_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); kfree(map); em->bdev = NULL; @@ -1807,12 +1810,15 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) struct btrfs_key found_key; u64 chunk_tree = chunk_root->root_key.objectid; u64 chunk_type; + bool retried = false; + int failed = 0; int ret; path = btrfs_alloc_path(); if (!path) return -ENOMEM; +again: key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; @@ -1842,7 +1848,10 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) ret = btrfs_relocate_chunk(chunk_root, chunk_tree, found_key.objectid, found_key.offset); - BUG_ON(ret); + if (ret == -ENOSPC) + failed++; + else if (ret) + BUG(); } if (found_key.offset == 0) @@ -1850,6 +1859,14 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) key.offset = found_key.offset - 1; } ret = 0; + if (failed && !retried) { + failed = 0; + retried = true; + goto again; + } else if (failed && retried) { + WARN_ON(1); + ret = -ENOSPC; + } error: btrfs_free_path(path); return ret; @@ -1894,6 +1911,8 @@ int btrfs_balance(struct btrfs_root *dev_root) continue; ret = btrfs_shrink_device(device, old_size - size_to_free); + if (ret == -ENOSPC) + break; BUG_ON(ret); trans = btrfs_start_transaction(dev_root, 1); @@ -1938,9 +1957,8 @@ int btrfs_balance(struct btrfs_root *dev_root) chunk = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_chunk); - key.offset = found_key.offset; /* chunk zero is special */ - if (key.offset == 0) + if (found_key.offset == 0) break; btrfs_release_path(chunk_root, path); @@ -1948,7 +1966,8 @@ int btrfs_balance(struct btrfs_root *dev_root) chunk_root->root_key.objectid, found_key.objectid, found_key.offset); - BUG_ON(ret); + BUG_ON(ret && ret != -ENOSPC); + key.offset = found_key.offset - 1; } ret = 0; error: @@ -1974,10 +1993,13 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) u64 chunk_offset; int ret; int slot; + int failed = 0; + bool retried = false; struct extent_buffer *l; struct btrfs_key key; struct btrfs_super_block *super_copy = &root->fs_info->super_copy; u64 old_total = btrfs_super_total_bytes(super_copy); + u64 old_size = device->total_bytes; u64 diff = device->total_bytes - new_size; if (new_size >= device->total_bytes) @@ -1987,12 +2009,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) if (!path) return -ENOMEM; - trans = btrfs_start_transaction(root, 1); - if (!trans) { - ret = -ENOMEM; - goto done; - } - path->reada = 2; lock_chunks(root); @@ -2001,8 +2017,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) if (device->writeable) device->fs_devices->total_rw_bytes -= diff; unlock_chunks(root); - btrfs_end_transaction(trans, root); +again: key.objectid = device->devid; key.offset = (u64)-1; key.type = BTRFS_DEV_EXTENT_KEY; @@ -2017,6 +2033,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) goto done; if (ret) { ret = 0; + btrfs_release_path(root, path); break; } @@ -2024,14 +2041,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) slot = path->slots[0]; btrfs_item_key_to_cpu(l, &key, path->slots[0]); - if (key.objectid != device->devid) + if (key.objectid != device->devid) { + btrfs_release_path(root, path); break; + } dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); length = btrfs_dev_extent_length(l, dev_extent); - if (key.offset + length <= new_size) + if (key.offset + length <= new_size) { + btrfs_release_path(root, path); break; + } chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); @@ -2040,8 +2061,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, chunk_offset); - if (ret) + if (ret && ret != -ENOSPC) goto done; + if (ret == -ENOSPC) + failed++; + key.offset -= 1; + } + + if (failed && !retried) { + failed = 0; + retried = true; + goto again; + } else if (failed && retried) { + ret = -ENOSPC; + lock_chunks(root); + + device->total_bytes = old_size; + if (device->writeable) + device->fs_devices->total_rw_bytes += diff; + unlock_chunks(root); + goto done; } /* Shrinking succeeded, else we would be at "done". */ @@ -2294,9 +2333,9 @@ again: em->block_len = em->len; em_tree = &extent_root->fs_info->mapping_tree.map_tree; - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); BUG_ON(ret); free_extent_map(em); @@ -2491,9 +2530,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) int readonly = 0; int i; - spin_lock(&map_tree->map_tree.lock); + read_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); - spin_unlock(&map_tree->map_tree.lock); + read_unlock(&map_tree->map_tree.lock); if (!em) return 1; @@ -2518,11 +2557,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) struct extent_map *em; while (1) { - spin_lock(&tree->map_tree.lock); + write_lock(&tree->map_tree.lock); em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); if (em) remove_extent_mapping(&tree->map_tree, em); - spin_unlock(&tree->map_tree.lock); + write_unlock(&tree->map_tree.lock); if (!em) break; kfree(em->bdev); @@ -2540,9 +2579,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) struct extent_map_tree *em_tree = &map_tree->map_tree; int ret; - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, len); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); BUG_ON(!em); BUG_ON(em->start > logical || em->start + em->len < logical); @@ -2604,9 +2643,9 @@ again: atomic_set(&multi->error, 0); } - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, *length); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); if (!em && unplug_page) return 0; @@ -2763,9 +2802,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, u64 stripe_nr; int i, j, nr = 0; - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, chunk_start, 1); - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); BUG_ON(!em || em->start != chunk_start); map = (struct map_lookup *)em->bdev; @@ -3053,9 +3092,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, logical = key->offset; length = btrfs_chunk_length(leaf, chunk); - spin_lock(&map_tree->map_tree.lock); + read_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); - spin_unlock(&map_tree->map_tree.lock); + read_unlock(&map_tree->map_tree.lock); /* already mapped? */ if (em && em->start <= logical && em->start + em->len > logical) { @@ -3114,9 +3153,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, map->stripes[i].dev->in_fs_metadata = 1; } - spin_lock(&map_tree->map_tree.lock); + write_lock(&map_tree->map_tree.lock); ret = add_extent_mapping(&map_tree->map_tree, em); - spin_unlock(&map_tree->map_tree.lock); + write_unlock(&map_tree->map_tree.lock); BUG_ON(ret); free_extent_map(em); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5139a833f72..31b0fabdd2e 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -181,4 +181,7 @@ int btrfs_balance(struct btrfs_root *dev_root); void btrfs_unlock_volumes(void); void btrfs_lock_volumes(void); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); +int find_free_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_device *device, u64 num_bytes, + u64 *start, u64 *max_avail); #endif diff --git a/fs/buffer.c b/fs/buffer.c index 209f7f15f5f..24afd7422ae 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2239,16 +2239,10 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size) struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; - unsigned long limit; int err; - err = -EFBIG; - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && size > (loff_t)limit) { - send_sig(SIGXFSZ, current, 0); - goto out; - } - if (size > inode->i_sb->s_maxbytes) + err = inode_newsize_ok(inode, size); + if (err) goto out; err = pagecache_write_begin(NULL, mapping, size, 0, diff --git a/fs/char_dev.c b/fs/char_dev.c index 3cbc57f932d..d6db933df2b 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -264,7 +264,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, { struct char_device_struct *cd; struct cdev *cdev; - char *s; int err = -ENOMEM; cd = __register_chrdev_region(major, baseminor, count, name); @@ -278,8 +277,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, cdev->owner = fops->owner; cdev->ops = fops; kobject_set_name(&cdev->kobj, "%s", name); - for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/')) - *s = '!'; err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); if (err) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d79ce2e95c2..90c5b39f031 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -185,8 +185,7 @@ out_mount_failed: cifs_sb->mountdata = NULL; } #endif - if (cifs_sb->local_nls) - unload_nls(cifs_sb->local_nls); + unload_nls(cifs_sb->local_nls); kfree(cifs_sb); } return rc; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 1f09c761931..5e2492535da 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1557,57 +1557,24 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from) static int cifs_vmtruncate(struct inode *inode, loff_t offset) { - struct address_space *mapping = inode->i_mapping; - unsigned long limit; + loff_t oldsize; + int err; spin_lock(&inode->i_lock); - if (inode->i_size < offset) - goto do_expand; - /* - * truncation of in-use swapfiles is disallowed - it would cause - * subsequent swapout to scribble on the now-freed blocks. - */ - if (IS_SWAPFILE(inode)) { - spin_unlock(&inode->i_lock); - goto out_busy; - } - i_size_write(inode, offset); - spin_unlock(&inode->i_lock); - /* - * unmap_mapping_range is called twice, first simply for efficiency - * so that truncate_inode_pages does fewer single-page unmaps. However - * after this first call, and before truncate_inode_pages finishes, - * it is possible for private pages to be COWed, which remain after - * truncate_inode_pages finishes, hence the second unmap_mapping_range - * call must be made for correctness. - */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - goto out_truncate; - -do_expand: - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) { + err = inode_newsize_ok(inode, offset); + if (err) { spin_unlock(&inode->i_lock); - goto out_sig; - } - if (offset > inode->i_sb->s_maxbytes) { - spin_unlock(&inode->i_lock); - goto out_big; + goto out; } + + oldsize = inode->i_size; i_size_write(inode, offset); spin_unlock(&inode->i_lock); -out_truncate: + truncate_pagecache(inode, oldsize, offset); if (inode->i_op->truncate) inode->i_op->truncate(inode); - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); -out_big: - return -EFBIG; -out_busy: - return -ETXTBSY; +out: + return err; } static int diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h index 8ccd5ed81d9..d99860a3389 100644 --- a/fs/coda/coda_int.h +++ b/fs/coda/coda_int.h @@ -2,6 +2,7 @@ #define _CODA_INT_ struct dentry; +struct file; extern struct file_system_type coda_fs_type; extern unsigned long coda_timeout; diff --git a/fs/compat.c b/fs/compat.c index 3aa48834a22..d576b552e8e 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -768,13 +768,13 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, char __user * type, unsigned long flags, void __user * data) { - unsigned long type_page; + char *kernel_type; unsigned long data_page; - unsigned long dev_page; + char *kernel_dev; char *dir_page; int retval; - retval = copy_mount_options (type, &type_page); + retval = copy_mount_string(type, &kernel_type); if (retval < 0) goto out; @@ -783,38 +783,38 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, if (IS_ERR(dir_page)) goto out1; - retval = copy_mount_options (dev_name, &dev_page); + retval = copy_mount_string(dev_name, &kernel_dev); if (retval < 0) goto out2; - retval = copy_mount_options (data, &data_page); + retval = copy_mount_options(data, &data_page); if (retval < 0) goto out3; retval = -EINVAL; - if (type_page && data_page) { - if (!strcmp((char *)type_page, SMBFS_NAME)) { + if (kernel_type && data_page) { + if (!strcmp(kernel_type, SMBFS_NAME)) { do_smb_super_data_conv((void *)data_page); - } else if (!strcmp((char *)type_page, NCPFS_NAME)) { + } else if (!strcmp(kernel_type, NCPFS_NAME)) { do_ncp_super_data_conv((void *)data_page); - } else if (!strcmp((char *)type_page, NFS4_NAME)) { + } else if (!strcmp(kernel_type, NFS4_NAME)) { if (do_nfs4_super_data_conv((void *) data_page)) goto out4; } } - retval = do_mount((char*)dev_page, dir_page, (char*)type_page, + retval = do_mount(kernel_dev, dir_page, kernel_type, flags, (void*)data_page); out4: free_page(data_page); out3: - free_page(dev_page); + kfree(kernel_dev); out2: putname(dir_page); out1: - free_page(type_page); + kfree(kernel_type); out: return retval; } diff --git a/fs/drop_caches.c b/fs/drop_caches.c index a2edb791344..31f4b0e6d72 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -63,9 +63,9 @@ static void drop_slab(void) } int drop_caches_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec_minmax(table, write, file, buffer, length, ppos); + proc_dointvec_minmax(table, write, buffer, length, ppos); if (write) { if (sysctl_drop_caches & 1) drop_pagecache(); diff --git a/fs/exec.c b/fs/exec.c index 5c833c18d0d..d49be6bc179 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -55,6 +55,7 @@ #include <linux/kmod.h> #include <linux/fsnotify.h> #include <linux/fs_struct.h> +#include <linux/pipe_fs_i.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> @@ -63,6 +64,7 @@ int core_uses_pid; char core_pattern[CORENAME_MAX_SIZE] = "core"; +unsigned int core_pipe_limit; int suid_dumpable = 0; /* The maximal length of core_pattern is also specified in sysctl.c */ @@ -1393,18 +1395,16 @@ out_ret: return retval; } -int set_binfmt(struct linux_binfmt *new) +void set_binfmt(struct linux_binfmt *new) { - struct linux_binfmt *old = current->binfmt; + struct mm_struct *mm = current->mm; - if (new) { - if (!try_module_get(new->module)) - return -1; - } - current->binfmt = new; - if (old) - module_put(old->module); - return 0; + if (mm->binfmt) + module_put(mm->binfmt->module); + + mm->binfmt = new; + if (new) + __module_get(new->module); } EXPORT_SYMBOL(set_binfmt); @@ -1728,6 +1728,29 @@ int get_dumpable(struct mm_struct *mm) return (ret >= 2) ? 2 : ret; } +static void wait_for_dump_helpers(struct file *file) +{ + struct pipe_inode_info *pipe; + + pipe = file->f_path.dentry->d_inode->i_pipe; + + pipe_lock(pipe); + pipe->readers++; + pipe->writers--; + + while ((pipe->readers > 1) && (!signal_pending(current))) { + wake_up_interruptible_sync(&pipe->wait); + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + pipe_wait(pipe); + } + + pipe->readers--; + pipe->writers++; + pipe_unlock(pipe); + +} + + void do_coredump(long signr, int exit_code, struct pt_regs *regs) { struct core_state core_state; @@ -1744,11 +1767,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; char **helper_argv = NULL; int helper_argc = 0; - char *delimit; + int dump_count = 0; + static atomic_t core_dump_count = ATOMIC_INIT(0); audit_core_dumps(signr); - binfmt = current->binfmt; + binfmt = mm->binfmt; if (!binfmt || !binfmt->core_dump) goto fail; @@ -1799,54 +1823,63 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) lock_kernel(); ispipe = format_corename(corename, signr); unlock_kernel(); - /* - * Don't bother to check the RLIMIT_CORE value if core_pattern points - * to a pipe. Since we're not writing directly to the filesystem - * RLIMIT_CORE doesn't really apply, as no actual core file will be - * created unless the pipe reader choses to write out the core file - * at which point file size limits and permissions will be imposed - * as it does with any other process - */ + if ((!ispipe) && (core_limit < binfmt->min_coredump)) goto fail_unlock; if (ispipe) { + if (core_limit == 0) { + /* + * Normally core limits are irrelevant to pipes, since + * we're not writing to the file system, but we use + * core_limit of 0 here as a speacial value. Any + * non-zero limit gets set to RLIM_INFINITY below, but + * a limit of 0 skips the dump. This is a consistent + * way to catch recursive crashes. We can still crash + * if the core_pattern binary sets RLIM_CORE = !0 + * but it runs as root, and can do lots of stupid things + * Note that we use task_tgid_vnr here to grab the pid + * of the process group leader. That way we get the + * right pid if a thread in a multi-threaded + * core_pattern process dies. + */ + printk(KERN_WARNING + "Process %d(%s) has RLIMIT_CORE set to 0\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Aborting core\n"); + goto fail_unlock; + } + + dump_count = atomic_inc_return(&core_dump_count); + if (core_pipe_limit && (core_pipe_limit < dump_count)) { + printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Skipping core dump\n"); + goto fail_dropcount; + } + helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); if (!helper_argv) { printk(KERN_WARNING "%s failed to allocate memory\n", __func__); - goto fail_unlock; - } - /* Terminate the string before the first option */ - delimit = strchr(corename, ' '); - if (delimit) - *delimit = '\0'; - delimit = strrchr(helper_argv[0], '/'); - if (delimit) - delimit++; - else - delimit = helper_argv[0]; - if (!strcmp(delimit, current->comm)) { - printk(KERN_NOTICE "Recursive core dump detected, " - "aborting\n"); - goto fail_unlock; + goto fail_dropcount; } core_limit = RLIM_INFINITY; /* SIGPIPE can happen, but it's just never processed */ - if (call_usermodehelper_pipe(corename+1, helper_argv, NULL, + if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL, &file)) { printk(KERN_INFO "Core dump to %s pipe failed\n", corename); - goto fail_unlock; + goto fail_dropcount; } } else file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600); if (IS_ERR(file)) - goto fail_unlock; + goto fail_dropcount; inode = file->f_path.dentry->d_inode; if (inode->i_nlink > 1) goto close_fail; /* multiple links - don't dump */ @@ -1875,7 +1908,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) if (retval) current->signal->group_exit_code |= 0x80; close_fail: + if (ispipe && core_pipe_limit) + wait_for_dump_helpers(file); filp_close(file, NULL); +fail_dropcount: + if (dump_count) + atomic_dec(&core_dump_count); fail_unlock: if (helper_argv) argv_free(helper_argv); diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 5ab10c3bbeb..9f500dec3b5 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -214,7 +214,6 @@ int exofs_sync_fs(struct super_block *sb, int wait) } lock_super(sb); - lock_kernel(); sbi = sb->s_fs_info; fscb->s_nextid = cpu_to_le64(sbi->s_nextid); fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); @@ -245,7 +244,6 @@ int exofs_sync_fs(struct super_block *sb, int wait) out: if (or) osd_end_request(or); - unlock_kernel(); unlock_super(sb); kfree(fscb); return ret; @@ -268,8 +266,6 @@ static void exofs_put_super(struct super_block *sb) int num_pend; struct exofs_sb_info *sbi = sb->s_fs_info; - lock_kernel(); - if (sb->s_dirt) exofs_write_super(sb); @@ -286,8 +282,6 @@ static void exofs_put_super(struct super_block *sb) osduld_put_device(sbi->s_dev); kfree(sb->s_fs_info); sb->s_fs_info = NULL; - - unlock_kernel(); } /* diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 1c1638f873a..ade634076d0 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -819,6 +819,7 @@ const struct address_space_operations ext2_aops = { .writepages = ext2_writepages, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; const struct address_space_operations ext2_aops_xip = { @@ -837,6 +838,7 @@ const struct address_space_operations ext2_nobh_aops = { .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, .migratepage = buffer_migrate_page, + .error_remove_page = generic_error_remove_page, }; /* diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index cd098a7b77f..acf1b142332 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1830,6 +1830,7 @@ static const struct address_space_operations ext3_ordered_aops = { .direct_IO = ext3_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations ext3_writeback_aops = { @@ -1845,6 +1846,7 @@ static const struct address_space_operations ext3_writeback_aops = { .direct_IO = ext3_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations ext3_journalled_aops = { @@ -1859,6 +1861,7 @@ static const struct address_space_operations ext3_journalled_aops = { .invalidatepage = ext3_invalidatepage, .releasepage = ext3_releasepage, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; void ext3_set_aops(struct inode *inode) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3a798737e30..064746fad58 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3386,6 +3386,7 @@ static const struct address_space_operations ext4_ordered_aops = { .direct_IO = ext4_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations ext4_writeback_aops = { @@ -3401,6 +3402,7 @@ static const struct address_space_operations ext4_writeback_aops = { .direct_IO = ext4_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations ext4_journalled_aops = { @@ -3415,6 +3417,7 @@ static const struct address_space_operations ext4_journalled_aops = { .invalidatepage = ext4_invalidatepage, .releasepage = ext4_releasepage, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations ext4_da_aops = { @@ -3431,6 +3434,7 @@ static const struct address_space_operations ext4_da_aops = { .direct_IO = ext4_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; void ext4_set_aops(struct inode *inode) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8970d8c49bb..04629d1302f 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -470,19 +470,11 @@ static void fat_put_super(struct super_block *sb) iput(sbi->fat_inode); - if (sbi->nls_disk) { - unload_nls(sbi->nls_disk); - sbi->nls_disk = NULL; - sbi->options.codepage = fat_default_codepage; - } - if (sbi->nls_io) { - unload_nls(sbi->nls_io); - sbi->nls_io = NULL; - } - if (sbi->options.iocharset != fat_default_iocharset) { + unload_nls(sbi->nls_disk); + unload_nls(sbi->nls_io); + + if (sbi->options.iocharset != fat_default_iocharset) kfree(sbi->options.iocharset); - sbi->options.iocharset = fat_default_iocharset; - } sb->s_fs_info = NULL; kfree(sbi); diff --git a/fs/fcntl.c b/fs/fcntl.c index ae413086db9..fc089f2f7f5 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp) return pid; } +static int f_setown_ex(struct file *filp, unsigned long arg) +{ + struct f_owner_ex * __user owner_p = (void * __user)arg; + struct f_owner_ex owner; + struct pid *pid; + int type; + int ret; + + ret = copy_from_user(&owner, owner_p, sizeof(owner)); + if (ret) + return ret; + + switch (owner.type) { + case F_OWNER_TID: + type = PIDTYPE_MAX; + break; + + case F_OWNER_PID: + type = PIDTYPE_PID; + break; + + case F_OWNER_GID: + type = PIDTYPE_PGID; + break; + + default: + return -EINVAL; + } + + rcu_read_lock(); + pid = find_vpid(owner.pid); + if (owner.pid && !pid) + ret = -ESRCH; + else + ret = __f_setown(filp, pid, type, 1); + rcu_read_unlock(); + + return ret; +} + +static int f_getown_ex(struct file *filp, unsigned long arg) +{ + struct f_owner_ex * __user owner_p = (void * __user)arg; + struct f_owner_ex owner; + int ret = 0; + + read_lock(&filp->f_owner.lock); + owner.pid = pid_vnr(filp->f_owner.pid); + switch (filp->f_owner.pid_type) { + case PIDTYPE_MAX: + owner.type = F_OWNER_TID; + break; + + case PIDTYPE_PID: + owner.type = F_OWNER_PID; + break; + + case PIDTYPE_PGID: + owner.type = F_OWNER_GID; + break; + + default: + WARN_ON(1); + ret = -EINVAL; + break; + } + read_unlock(&filp->f_owner.lock); + + if (!ret) + ret = copy_to_user(owner_p, &owner, sizeof(owner)); + return ret; +} + static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { @@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_SETOWN: err = f_setown(filp, arg, 1); break; + case F_GETOWN_EX: + err = f_getown_ex(filp, arg); + break; + case F_SETOWN_EX: + err = f_setown_ex(filp, arg); + break; case F_GETSIG: err = filp->f_owner.signum; break; @@ -428,8 +507,7 @@ static inline int sigio_perm(struct task_struct *p, static void send_sigio_to_task(struct task_struct *p, struct fown_struct *fown, - int fd, - int reason) + int fd, int reason, int group) { /* * F_SETSIG can change ->signum lockless in parallel, make @@ -461,11 +539,11 @@ static void send_sigio_to_task(struct task_struct *p, else si.si_band = band_table[reason - POLL_IN]; si.si_fd = fd; - if (!group_send_sig_info(signum, &si, p)) + if (!do_send_sig_info(signum, &si, p, group)) break; /* fall-through: fall back on the old plain SIGIO signal */ case 0: - group_send_sig_info(SIGIO, SEND_SIG_PRIV, p); + do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group); } } @@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown, int fd, int band) struct task_struct *p; enum pid_type type; struct pid *pid; + int group = 1; read_lock(&fown->lock); + type = fown->pid_type; + if (type == PIDTYPE_MAX) { + group = 0; + type = PIDTYPE_PID; + } + pid = fown->pid; if (!pid) goto out_unlock_fown; read_lock(&tasklist_lock); do_each_pid_task(pid, type, p) { - send_sigio_to_task(p, fown, fd, band); + send_sigio_to_task(p, fown, fd, band, group); } while_each_pid_task(pid, type, p); read_unlock(&tasklist_lock); out_unlock_fown: @@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown, int fd, int band) } static void send_sigurg_to_task(struct task_struct *p, - struct fown_struct *fown) + struct fown_struct *fown, int group) { if (sigio_perm(p, fown, SIGURG)) - group_send_sig_info(SIGURG, SEND_SIG_PRIV, p); + do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group); } int send_sigurg(struct fown_struct *fown) @@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown) struct task_struct *p; enum pid_type type; struct pid *pid; + int group = 1; int ret = 0; read_lock(&fown->lock); + type = fown->pid_type; + if (type == PIDTYPE_MAX) { + group = 0; + type = PIDTYPE_PID; + } + pid = fown->pid; if (!pid) goto out_unlock_fown; @@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown) read_lock(&tasklist_lock); do_each_pid_task(pid, type, p) { - send_sigurg_to_task(p, fown); + send_sigurg_to_task(p, fown, group); } while_each_pid_task(pid, type, p); read_unlock(&tasklist_lock); out_unlock_fown: diff --git a/fs/file_table.c b/fs/file_table.c index 334ce39881f..8eb44042e00 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(get_max_files); * Handle nr_files sysctl */ #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) -int proc_nr_files(ctl_table *table, int write, struct file *filp, +int proc_nr_files(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { files_stat.nr_files = get_nr_files(); - return proc_dointvec(table, write, filp, buffer, lenp, ppos); + return proc_dointvec(table, write, buffer, lenp, ppos); } #else -int proc_nr_files(ctl_table *table, int write, struct file *filp, +int proc_nr_files(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e703654e7f4..992f6c9410b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1276,14 +1276,9 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, return 0; if (attr->ia_valid & ATTR_SIZE) { - unsigned long limit; - if (IS_SWAPFILE(inode)) - return -ETXTBSY; - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) { - send_sig(SIGXFSZ, current, 0); - return -EFBIG; - } + err = inode_newsize_ok(inode, attr->ia_size); + if (err) + return err; is_truncate = true; } @@ -1350,8 +1345,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. */ if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { - if (outarg.attr.size < oldsize) - fuse_truncate(inode->i_mapping, outarg.attr.size); + truncate_pagecache(inode, oldsize, outarg.attr.size); invalidate_inode_pages2(inode->i_mapping); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fc9c79feb5f..01cc462ff45 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -606,8 +606,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u64 attr_valid); -void fuse_truncate(struct address_space *mapping, loff_t offset); - /** * Initialize the client device */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6da947daabd..1a822ce2b24 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -140,14 +140,6 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } -void fuse_truncate(struct address_space *mapping, loff_t offset) -{ - /* See vmtruncate() */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); -} - void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u64 attr_valid) { @@ -205,8 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, spin_unlock(&fc->lock); if (S_ISREG(inode->i_mode) && oldsize != attr->size) { - if (attr->size < oldsize) - fuse_truncate(inode->i_mapping, attr->size); + truncate_pagecache(inode, oldsize, attr->size); invalidate_inode_pages2(inode->i_mapping); } } diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 7ebae9a4ecc..694b5d48f03 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1135,6 +1135,7 @@ static const struct address_space_operations gfs2_writeback_aops = { .direct_IO = gfs2_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations gfs2_ordered_aops = { @@ -1151,6 +1152,7 @@ static const struct address_space_operations gfs2_ordered_aops = { .direct_IO = gfs2_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations gfs2_jdata_aops = { @@ -1166,6 +1168,7 @@ static const struct address_space_operations gfs2_jdata_aops = { .invalidatepage = gfs2_invalidatepage, .releasepage = gfs2_releasepage, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; void gfs2_set_aops(struct inode *inode) diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index c3ac1805405..247436c10de 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -12,7 +12,6 @@ #include <linux/completion.h> #include <linux/buffer_head.h> #include <linux/namei.h> -#include <linux/utsname.h> #include <linux/mm.h> #include <linux/xattr.h> #include <linux/posix_acl.h> diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 7b6165f25fb..8bbe03c3f6d 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -344,10 +344,8 @@ void hfs_mdb_put(struct super_block *sb) brelse(HFS_SB(sb)->mdb_bh); brelse(HFS_SB(sb)->alt_mdb_bh); - if (HFS_SB(sb)->nls_io) - unload_nls(HFS_SB(sb)->nls_io); - if (HFS_SB(sb)->nls_disk) - unload_nls(HFS_SB(sb)->nls_disk); + unload_nls(HFS_SB(sb)->nls_io); + unload_nls(HFS_SB(sb)->nls_disk); free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0); kfree(HFS_SB(sb)); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index c0759fe0855..43022f3d514 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -229,8 +229,7 @@ static void hfsplus_put_super(struct super_block *sb) iput(HFSPLUS_SB(sb).alloc_file); iput(HFSPLUS_SB(sb).hidden_dir); brelse(HFSPLUS_SB(sb).s_vhbh); - if (HFSPLUS_SB(sb).nls) - unload_nls(HFSPLUS_SB(sb).nls); + unload_nls(HFSPLUS_SB(sb).nls); kfree(sb->s_fs_info); sb->s_fs_info = NULL; @@ -464,8 +463,7 @@ out: cleanup: hfsplus_put_super(sb); - if (nls) - unload_nls(nls); + unload_nls(nls); return err; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index eba6d552d9c..87a1258953b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -380,36 +380,11 @@ static void hugetlbfs_delete_inode(struct inode *inode) static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) { - struct super_block *sb = inode->i_sb; - - if (!hlist_unhashed(&inode->i_hash)) { - if (!(inode->i_state & (I_DIRTY|I_SYNC))) - list_move(&inode->i_list, &inode_unused); - inodes_stat.nr_unused++; - if (!sb || (sb->s_flags & MS_ACTIVE)) { - spin_unlock(&inode_lock); - return; - } - inode->i_state |= I_WILL_FREE; - spin_unlock(&inode_lock); - /* - * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK - * in our backing_dev_info. - */ - write_inode_now(inode, 1); - spin_lock(&inode_lock); - inode->i_state &= ~I_WILL_FREE; - inodes_stat.nr_unused--; - hlist_del_init(&inode->i_hash); + if (generic_detach_inode(inode)) { + truncate_hugepages(inode, 0); + clear_inode(inode); + destroy_inode(inode); } - list_del_init(&inode->i_list); - list_del_init(&inode->i_sb_list); - inode->i_state |= I_FREEING; - inodes_stat.nr_inodes--; - spin_unlock(&inode_lock); - truncate_hugepages(inode, 0); - clear_inode(inode); - destroy_inode(inode); } static void hugetlbfs_drop_inode(struct inode *inode) @@ -936,15 +911,9 @@ static struct file_system_type hugetlbfs_fs_type = { static struct vfsmount *hugetlbfs_vfsmount; -static int can_do_hugetlb_shm(int creat_flags) +static int can_do_hugetlb_shm(void) { - if (creat_flags != HUGETLB_SHMFS_INODE) - return 0; - if (capable(CAP_IPC_LOCK)) - return 1; - if (in_group_p(sysctl_hugetlb_shm_group)) - return 1; - return 0; + return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); } struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, @@ -960,7 +929,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, if (!hugetlbfs_vfsmount) return ERR_PTR(-ENOENT); - if (!can_do_hugetlb_shm(creat_flags)) { + if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { *user = current_user(); if (user_shm_lock(size, *user)) { WARN_ONCE(1, diff --git a/fs/inode.c b/fs/inode.c index 76582b06ab9..4d8e3be5597 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1241,7 +1241,16 @@ void generic_delete_inode(struct inode *inode) } EXPORT_SYMBOL(generic_delete_inode); -static void generic_forget_inode(struct inode *inode) +/** + * generic_detach_inode - remove inode from inode lists + * @inode: inode to remove + * + * Remove inode from inode lists, write it if it's dirty. This is just an + * internal VFS helper exported for hugetlbfs. Do not use! + * + * Returns 1 if inode should be completely destroyed. + */ +int generic_detach_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; @@ -1251,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode) inodes_stat.nr_unused++; if (sb->s_flags & MS_ACTIVE) { spin_unlock(&inode_lock); - return; + return 0; } WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_WILL_FREE; @@ -1269,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode) inode->i_state |= I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); + return 1; +} +EXPORT_SYMBOL_GPL(generic_detach_inode); + +static void generic_forget_inode(struct inode *inode) +{ + if (!generic_detach_inode(inode)) + return; if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); @@ -1399,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry) struct inode *inode = dentry->d_inode; struct timespec now; - if (mnt_want_write(mnt)) - return; if (inode->i_flags & S_NOATIME) - goto out; + return; if (IS_NOATIME(inode)) - goto out; + return; if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) - goto out; + return; if (mnt->mnt_flags & MNT_NOATIME) - goto out; + return; if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) - goto out; + return; now = current_fs_time(inode->i_sb); if (!relatime_need_update(mnt, inode, now)) - goto out; + return; if (timespec_equal(&inode->i_atime, &now)) - goto out; + return; + + if (mnt_want_write(mnt)) + return; inode->i_atime = now; mark_inode_dirty_sync(inode); -out: mnt_drop_write(mnt); } EXPORT_SYMBOL(touch_atime); @@ -1444,34 +1461,37 @@ void file_update_time(struct file *file) { struct inode *inode = file->f_path.dentry->d_inode; struct timespec now; - int sync_it = 0; - int err; + enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; + /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) return; - err = mnt_want_write_file(file); - if (err) - return; - now = current_fs_time(inode->i_sb); - if (!timespec_equal(&inode->i_mtime, &now)) { - inode->i_mtime = now; - sync_it = 1; - } + if (!timespec_equal(&inode->i_mtime, &now)) + sync_it = S_MTIME; - if (!timespec_equal(&inode->i_ctime, &now)) { - inode->i_ctime = now; - sync_it = 1; - } + if (!timespec_equal(&inode->i_ctime, &now)) + sync_it |= S_CTIME; - if (IS_I_VERSION(inode)) { - inode_inc_iversion(inode); - sync_it = 1; - } + if (IS_I_VERSION(inode)) + sync_it |= S_VERSION; + + if (!sync_it) + return; - if (sync_it) - mark_inode_dirty_sync(inode); + /* Finally allowed to write? Takes lock. */ + if (mnt_want_write_file(file)) + return; + + /* Only change inode inside the lock region */ + if (sync_it & S_VERSION) + inode_inc_iversion(inode); + if (sync_it & S_CTIME) + inode->i_ctime = now; + if (sync_it & S_MTIME) + inode->i_mtime = now; + mark_inode_dirty_sync(inode); mnt_drop_write(file->f_path.mnt); } EXPORT_SYMBOL(file_update_time); @@ -1599,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) else if (S_ISSOCK(mode)) inode->i_fop = &bad_sock_fops; else - printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", - mode); + printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for" + " inode %s:%lu\n", mode, inode->i_sb->s_id, + inode->i_ino); } EXPORT_SYMBOL(init_special_inode); diff --git a/fs/internal.h b/fs/internal.h index d55ef562f0b..515175b8b72 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -57,6 +57,7 @@ extern int check_unsafe_exec(struct linux_binprm *); * namespace.c */ extern int copy_mount_options(const void __user *, unsigned long *); +extern int copy_mount_string(const void __user *, char **); extern void free_vfsmnt(struct vfsmount *); extern struct vfsmount *alloc_vfsmnt(const char *); diff --git a/fs/ioctl.c b/fs/ioctl.c index 5612880fcbe..7b17a14396f 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -162,20 +162,21 @@ EXPORT_SYMBOL(fiemap_check_flags); static int fiemap_check_ranges(struct super_block *sb, u64 start, u64 len, u64 *new_len) { + u64 maxbytes = (u64) sb->s_maxbytes; + *new_len = len; if (len == 0) return -EINVAL; - if (start > sb->s_maxbytes) + if (start > maxbytes) return -EFBIG; /* * Shrink request scope to what the fs can actually handle. */ - if ((len > sb->s_maxbytes) || - (sb->s_maxbytes - len) < start) - *new_len = sb->s_maxbytes - start; + if (len > maxbytes || (maxbytes - len) < start) + *new_len = maxbytes - start; return 0; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 85f96bc651c..6b4dcd4f294 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -46,10 +46,7 @@ static void isofs_put_super(struct super_block *sb) #ifdef CONFIG_JOLIET lock_kernel(); - if (sbi->s_nls_iocharset) { - unload_nls(sbi->s_nls_iocharset); - sbi->s_nls_iocharset = NULL; - } + unload_nls(sbi->s_nls_iocharset); unlock_kernel(); #endif @@ -912,8 +909,7 @@ out_no_root: printk(KERN_WARNING "%s: get root inode failed\n", __func__); out_no_inode: #ifdef CONFIG_JOLIET - if (sbi->s_nls_iocharset) - unload_nls(sbi->s_nls_iocharset); + unload_nls(sbi->s_nls_iocharset); #endif goto out_freesbi; out_no_read: diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 37e6dcda8fc..2234c73fc57 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -178,13 +178,11 @@ static void jfs_put_super(struct super_block *sb) rc = jfs_umount(sb); if (rc) jfs_err("jfs_umount failed with return code %d", rc); - if (sbi->nls_tab) - unload_nls(sbi->nls_tab); - sbi->nls_tab = NULL; + + unload_nls(sbi->nls_tab); truncate_inode_pages(sbi->direct_inode->i_mapping, 0); iput(sbi->direct_inode); - sbi->direct_inode = NULL; kfree(sbi); @@ -347,8 +345,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, if (nls_map != (void *) -1) { /* Discard old (if remount) */ - if (sbi->nls_tab) - unload_nls(sbi->nls_tab); + unload_nls(sbi->nls_tab); sbi->nls_tab = nls_map; } return 1; diff --git a/fs/libfs.c b/fs/libfs.c index dcec3d3ea64..219576c52d8 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -527,14 +527,18 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available) { loff_t pos = *ppos; + size_t ret; + if (pos < 0) return -EINVAL; - if (pos >= available) + if (pos >= available || !count) return 0; if (count > available - pos) count = available - pos; - if (copy_to_user(to, from + pos, count)) + ret = copy_to_user(to, from + pos, count); + if (ret == count) return -EFAULT; + count -= ret; *ppos = pos + count; return count; } @@ -735,10 +739,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, if (copy_from_user(attr->set_buf, buf, size)) goto out; - ret = len; /* claim we got the whole input */ attr->set_buf[size] = '\0'; val = simple_strtol(attr->set_buf, NULL, 0); - attr->set(attr->data, val); + ret = attr->set(attr->data, val); + if (ret == 0) + ret = len; /* on success, claim we got the whole input */ out: mutex_unlock(&attr->mutex); return ret; diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 0336f2beacd..b583ab0a4cb 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -8,7 +8,6 @@ #include <linux/types.h> #include <linux/sched.h> -#include <linux/utsname.h> #include <linux/nfs.h> #include <linux/sunrpc/xdr.h> diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index e1d52865319..ad9dbbc9145 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -9,7 +9,6 @@ #include <linux/types.h> #include <linux/sched.h> -#include <linux/utsname.h> #include <linux/nfs.h> #include <linux/sunrpc/xdr.h> diff --git a/fs/namespace.c b/fs/namespace.c index 7230787d18b..bdc3cb4fd22 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1640,7 +1640,7 @@ static int do_new_mount(struct path *path, char *type, int flags, { struct vfsmount *mnt; - if (!type || !memchr(type, 0, PAGE_SIZE)) + if (!type) return -EINVAL; /* we need capabilities... */ @@ -1871,6 +1871,23 @@ int copy_mount_options(const void __user * data, unsigned long *where) return 0; } +int copy_mount_string(const void __user *data, char **where) +{ + char *tmp; + + if (!data) { + *where = NULL; + return 0; + } + + tmp = strndup_user(data, PAGE_SIZE); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + *where = tmp; + return 0; +} + /* * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to * be given to the mount() call (ie: read-only, no-dev, no-suid etc). @@ -1900,8 +1917,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) return -EINVAL; - if (dev_name && !memchr(dev_name, 0, PAGE_SIZE)) - return -EINVAL; if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; @@ -2070,40 +2085,42 @@ EXPORT_SYMBOL(create_mnt_ns); SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, char __user *, type, unsigned long, flags, void __user *, data) { - int retval; + int ret; + char *kernel_type; + char *kernel_dir; + char *kernel_dev; unsigned long data_page; - unsigned long type_page; - unsigned long dev_page; - char *dir_page; - retval = copy_mount_options(type, &type_page); - if (retval < 0) - return retval; + ret = copy_mount_string(type, &kernel_type); + if (ret < 0) + goto out_type; - dir_page = getname(dir_name); - retval = PTR_ERR(dir_page); - if (IS_ERR(dir_page)) - goto out1; + kernel_dir = getname(dir_name); + if (IS_ERR(kernel_dir)) { + ret = PTR_ERR(kernel_dir); + goto out_dir; + } - retval = copy_mount_options(dev_name, &dev_page); - if (retval < 0) - goto out2; + ret = copy_mount_string(dev_name, &kernel_dev); + if (ret < 0) + goto out_dev; - retval = copy_mount_options(data, &data_page); - if (retval < 0) - goto out3; + ret = copy_mount_options(data, &data_page); + if (ret < 0) + goto out_data; - retval = do_mount((char *)dev_page, dir_page, (char *)type_page, - flags, (void *)data_page); - free_page(data_page); + ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags, + (void *) data_page); -out3: - free_page(dev_page); -out2: - putname(dir_page); -out1: - free_page(type_page); - return retval; + free_page(data_page); +out_data: + kfree(kernel_dev); +out_dev: + putname(kernel_dir); +out_dir: + kfree(kernel_type); +out_type: + return ret; } /* diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index b99ce205b1b..cf98da1be23 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -746,16 +746,8 @@ static void ncp_put_super(struct super_block *sb) #ifdef CONFIG_NCPFS_NLS /* unload the NLS charsets */ - if (server->nls_vol) - { - unload_nls(server->nls_vol); - server->nls_vol = NULL; - } - if (server->nls_io) - { - unload_nls(server->nls_io); - server->nls_io = NULL; - } + unload_nls(server->nls_vol); + unload_nls(server->nls_io); #endif /* CONFIG_NCPFS_NLS */ if (server->info_filp) diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 53a7ed7eb9c..0d58caf4a6e 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -223,10 +223,8 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) oldset_io = server->nls_io; server->nls_io = iocharset; - if (oldset_cp) - unload_nls(oldset_cp); - if (oldset_io) - unload_nls(oldset_io); + unload_nls(oldset_cp); + unload_nls(oldset_io); return 0; } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 152025358da..63976c0ccc2 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -648,8 +648,6 @@ static int nfs_start_lockd(struct nfs_server *server) .hostname = clp->cl_hostname, .address = (struct sockaddr *)&clp->cl_addr, .addrlen = clp->cl_addrlen, - .protocol = server->flags & NFS_MOUNT_TCP ? - IPPROTO_TCP : IPPROTO_UDP, .nfs_version = clp->rpc_ops->version, .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? 1 : 0, @@ -660,6 +658,14 @@ static int nfs_start_lockd(struct nfs_server *server) if (server->flags & NFS_MOUNT_NONLM) return 0; + switch (clp->cl_proto) { + default: + nlm_init.protocol = IPPROTO_TCP; + break; + case XPRT_TRANSPORT_UDP: + nlm_init.protocol = IPPROTO_UDP; + } + host = nlmclnt_init(&nlm_init); if (IS_ERR(host)) return PTR_ERR(host); @@ -787,7 +793,7 @@ static int nfs_init_server(struct nfs_server *server, dprintk("--> nfs_init_server()\n"); #ifdef CONFIG_NFS_V3 - if (data->flags & NFS_MOUNT_VER3) + if (data->version == 3) cl_init.rpc_ops = &nfs_v3_clientops; #endif @@ -964,6 +970,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve target->acdirmin = source->acdirmin; target->acdirmax = source->acdirmax; target->caps = source->caps; + target->options = source->options; } /* diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5021b75d2d1..86d6b4db109 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -525,6 +525,7 @@ const struct address_space_operations nfs_file_aops = { .direct_IO = nfs_direct_IO, .migratepage = nfs_migrate_page, .launder_page = nfs_launder_page, + .error_remove_page = generic_error_remove_page, }; /* diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 379be678cb7..70fad69eb95 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -58,17 +58,34 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp) /* * Get the cache cookie for an NFS superblock. We have to handle * uniquification here because the cache doesn't do it for us. + * + * The default uniquifier is just an empty string, but it may be overridden + * either by the 'fsc=xxx' option to mount, or by inheriting it from the parent + * superblock across an automount point of some nature. */ -void nfs_fscache_get_super_cookie(struct super_block *sb, - struct nfs_parsed_mount_data *data) +void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, + struct nfs_clone_mount *mntdata) { struct nfs_fscache_key *key, *xkey; struct nfs_server *nfss = NFS_SB(sb); struct rb_node **p, *parent; - const char *uniq = data->fscache_uniq ?: ""; int diff, ulen; - ulen = strlen(uniq); + if (uniq) { + ulen = strlen(uniq); + } else if (mntdata) { + struct nfs_server *mnt_s = NFS_SB(mntdata->sb); + if (mnt_s->fscache_key) { + uniq = mnt_s->fscache_key->key.uniquifier; + ulen = mnt_s->fscache_key->key.uniq_len; + } + } + + if (!uniq) { + uniq = ""; + ulen = 1; + } + key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL); if (!key) return; diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 6e809bb0ff0..b9c572d0679 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -74,7 +74,8 @@ extern void nfs_fscache_get_client_cookie(struct nfs_client *); extern void nfs_fscache_release_client_cookie(struct nfs_client *); extern void nfs_fscache_get_super_cookie(struct super_block *, - struct nfs_parsed_mount_data *); + const char *, + struct nfs_clone_mount *); extern void nfs_fscache_release_super_cookie(struct super_block *); extern void nfs_fscache_init_inode_cookie(struct inode *); @@ -173,7 +174,8 @@ static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} static inline void nfs_fscache_get_super_cookie( struct super_block *sb, - struct nfs_parsed_mount_data *data) + const char *uniq, + struct nfs_clone_mount *mntdata) { } static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 060022b4651..faa091865ad 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -458,49 +458,21 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) */ static int nfs_vmtruncate(struct inode * inode, loff_t offset) { - if (i_size_read(inode) < offset) { - unsigned long limit; - - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out_big; - spin_lock(&inode->i_lock); - i_size_write(inode, offset); - spin_unlock(&inode->i_lock); - } else { - struct address_space *mapping = inode->i_mapping; + loff_t oldsize; + int err; - /* - * truncation of in-use swapfiles is disallowed - it would - * cause subsequent swapout to scribble on the now-freed - * blocks. - */ - if (IS_SWAPFILE(inode)) - return -ETXTBSY; - spin_lock(&inode->i_lock); - i_size_write(inode, offset); - spin_unlock(&inode->i_lock); + err = inode_newsize_ok(inode, offset); + if (err) + goto out; - /* - * unmap_mapping_range is called twice, first simply for - * efficiency so that truncate_inode_pages does fewer - * single-page unmaps. However after this first call, and - * before truncate_inode_pages finishes, it is possible for - * private pages to be COWed, which remain after - * truncate_inode_pages finishes, hence the second - * unmap_mapping_range call must be made for correctness. - */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - } - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); -out_big: - return -EFBIG; + spin_lock(&inode->i_lock); + oldsize = inode->i_size; + i_size_write(inode, offset); + spin_unlock(&inode->i_lock); + + truncate_pagecache(inode, oldsize, offset); +out: + return err; } /** diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index c862c9340f9..5e078b222b4 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -13,7 +13,6 @@ #include <linux/time.h> #include <linux/mm.h> #include <linux/slab.h> -#include <linux/utsname.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/in.h> diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index ee6a13f0544..3f8881d1a05 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -7,7 +7,6 @@ */ #include <linux/mm.h> -#include <linux/utsname.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/sunrpc/clnt.h> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 35869a4921f..5fe5492fbd2 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -10,7 +10,6 @@ #include <linux/time.h> #include <linux/mm.h> #include <linux/slab.h> -#include <linux/utsname.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/in.h> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index be6544aef41..ed7c269e251 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -36,7 +36,6 @@ */ #include <linux/mm.h> -#include <linux/utsname.h> #include <linux/delay.h> #include <linux/errno.h> #include <linux/string.h> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index cfc30d362f9..83ad47cbdd8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -39,7 +39,6 @@ #include <linux/time.h> #include <linux/mm.h> #include <linux/slab.h> -#include <linux/utsname.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/in.h> diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 7be72d90d49..ef583854d8d 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -32,7 +32,6 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/mm.h> -#include <linux/utsname.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/in.h> diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f1cc0587cfe..810770f9681 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -728,6 +728,27 @@ static void nfs_umount_begin(struct super_block *sb) unlock_kernel(); } +static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(int flags) +{ + struct nfs_parsed_mount_data *data; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data) { + data->flags = flags; + data->rsize = NFS_MAX_FILE_IO_SIZE; + data->wsize = NFS_MAX_FILE_IO_SIZE; + data->acregmin = NFS_DEF_ACREGMIN; + data->acregmax = NFS_DEF_ACREGMAX; + data->acdirmin = NFS_DEF_ACDIRMIN; + data->acdirmax = NFS_DEF_ACDIRMAX; + data->nfs_server.port = NFS_UNSPEC_PORT; + data->auth_flavors[0] = RPC_AUTH_UNIX; + data->auth_flavor_len = 1; + data->minorversion = 0; + } + return data; +} + /* * Sanity-check a server address provided by the mount command. * @@ -1430,10 +1451,13 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, int status; if (args->mount_server.version == 0) { - if (args->flags & NFS_MOUNT_VER3) - args->mount_server.version = NFS_MNT3_VERSION; - else - args->mount_server.version = NFS_MNT_VERSION; + switch (args->version) { + default: + args->mount_server.version = NFS_MNT3_VERSION; + break; + case 2: + args->mount_server.version = NFS_MNT_VERSION; + } } request.version = args->mount_server.version; @@ -1634,20 +1658,6 @@ static int nfs_validate_mount_data(void *options, if (data == NULL) goto out_no_data; - args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP); - args->rsize = NFS_MAX_FILE_IO_SIZE; - args->wsize = NFS_MAX_FILE_IO_SIZE; - args->acregmin = NFS_DEF_ACREGMIN; - args->acregmax = NFS_DEF_ACREGMAX; - args->acdirmin = NFS_DEF_ACDIRMIN; - args->acdirmax = NFS_DEF_ACDIRMAX; - args->mount_server.port = NFS_UNSPEC_PORT; - args->nfs_server.port = NFS_UNSPEC_PORT; - args->nfs_server.protocol = XPRT_TRANSPORT_TCP; - args->auth_flavors[0] = RPC_AUTH_UNIX; - args->auth_flavor_len = 1; - args->minorversion = 0; - switch (data->version) { case 1: data->namlen = 0; @@ -1778,7 +1788,7 @@ static int nfs_validate_mount_data(void *options, } #ifndef CONFIG_NFS_V3 - if (args->flags & NFS_MOUNT_VER3) + if (args->version == 3) goto out_v3_not_compiled; #endif /* !CONFIG_NFS_V3 */ @@ -1936,7 +1946,7 @@ static void nfs_fill_super(struct super_block *sb, if (data->bsize) sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); - if (server->flags & NFS_MOUNT_VER3) { + if (server->nfs_client->rpc_ops->version == 3) { /* The VFS shouldn't apply the umask to mode bits. We will do * so ourselves when necessary. */ @@ -1960,7 +1970,7 @@ static void nfs_clone_super(struct super_block *sb, sb->s_blocksize = old_sb->s_blocksize; sb->s_maxbytes = old_sb->s_maxbytes; - if (server->flags & NFS_MOUNT_VER3) { + if (server->nfs_client->rpc_ops->version == 3) { /* The VFS shouldn't apply the umask to mode bits. We will do * so ourselves when necessary. */ @@ -2094,7 +2104,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, }; int error = -ENOMEM; - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = nfs_alloc_parsed_mount_data(NFS_MOUNT_VER3 | NFS_MOUNT_TCP); mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); if (data == NULL || mntfh == NULL) goto out_free_fh; @@ -2144,7 +2154,8 @@ static int nfs_get_sb(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ nfs_fill_super(s, data); - nfs_fscache_get_super_cookie(s, data); + nfs_fscache_get_super_cookie( + s, data ? data->fscache_uniq : NULL, NULL); } mntroot = nfs_get_root(s, mntfh); @@ -2245,6 +2256,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, if (!s->s_root) { /* initial superblock/root creation */ nfs_clone_super(s, data->sb); + nfs_fscache_get_super_cookie(s, NULL, data); } mntroot = nfs_get_root(s, data->fh); @@ -2362,18 +2374,7 @@ static int nfs4_validate_mount_data(void *options, if (data == NULL) goto out_no_data; - args->rsize = NFS_MAX_FILE_IO_SIZE; - args->wsize = NFS_MAX_FILE_IO_SIZE; - args->acregmin = NFS_DEF_ACREGMIN; - args->acregmax = NFS_DEF_ACREGMAX; - args->acdirmin = NFS_DEF_ACDIRMIN; - args->acdirmax = NFS_DEF_ACDIRMAX; - args->nfs_server.port = NFS_UNSPEC_PORT; - args->auth_flavors[0] = RPC_AUTH_UNIX; - args->auth_flavor_len = 1; args->version = 4; - args->minorversion = 0; - switch (data->version) { case 1: if (data->host_addrlen > sizeof(args->nfs_server.address)) @@ -2508,7 +2509,8 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ nfs4_fill_super(s); - nfs_fscache_get_super_cookie(s, data); + nfs_fscache_get_super_cookie( + s, data ? data->fscache_uniq : NULL, NULL); } mntroot = nfs4_get_root(s, mntfh); @@ -2656,7 +2658,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, struct nfs_parsed_mount_data *data; int error = -ENOMEM; - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = nfs_alloc_parsed_mount_data(0); if (data == NULL) goto out_free_data; @@ -2741,6 +2743,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, if (!s->s_root) { /* initial superblock/root creation */ nfs4_clone_super(s, data->sb); + nfs_fscache_get_super_cookie(s, NULL, data); } mntroot = nfs4_get_root(s, data->fh); @@ -2822,6 +2825,7 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ nfs4_fill_super(s); + nfs_fscache_get_super_cookie(s, NULL, data); } mntroot = nfs4_get_root(s, &mntfh); diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index cdfa86fa147..ba2c199592f 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -38,7 +38,6 @@ #include <linux/init.h> #include <linux/mm.h> -#include <linux/utsname.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/sunrpc/clnt.h> diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 477d37d83b3..2224b4d07bf 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -270,7 +270,8 @@ struct nls_table *load_nls(char *charset) void unload_nls(struct nls_table *nls) { - module_put(nls->owner); + if (nls) + module_put(nls->owner); } static const wchar_t charset2uni[256] = { diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index b38f944f066..cfce53cb65d 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1550,6 +1550,7 @@ const struct address_space_operations ntfs_aops = { .migratepage = buffer_migrate_page, /* Move a page cache page from one physical page to an other. */ + .error_remove_page = generic_error_remove_page, }; /** @@ -1569,6 +1570,7 @@ const struct address_space_operations ntfs_mst_aops = { .migratepage = buffer_migrate_page, /* Move a page cache page from one physical page to an other. */ + .error_remove_page = generic_error_remove_page, }; #ifdef NTFS_RW diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index abaaa1cbf8d..80b04770e8e 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -201,8 +201,7 @@ use_utf8: v, old_nls->charset); nls_map = old_nls; } else /* nls_map */ { - if (old_nls) - unload_nls(old_nls); + unload_nls(old_nls); } } else if (!strcmp(p, "utf8")) { bool val = false; @@ -2427,10 +2426,9 @@ static void ntfs_put_super(struct super_block *sb) ntfs_free(vol->upcase); vol->upcase = NULL; } - if (vol->nls_map) { - unload_nls(vol->nls_map); - vol->nls_map = NULL; - } + + unload_nls(vol->nls_map); + sb->s_fs_info = NULL; kfree(vol); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 72e76062a90..deb2b132ae5 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2022,4 +2022,5 @@ const struct address_space_operations ocfs2_aops = { .releasepage = ocfs2_releasepage, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 81eff8e5832..01cf8cc3d28 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -30,7 +30,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> -#include <linux/utsname.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/random.h> diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index 75997b4deaf..ca96bce50e1 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -30,7 +30,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> -#include <linux/utsname.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/random.h> diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index c5c88124096..ca46002ec10 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -27,7 +27,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> -#include <linux/utsname.h> #include <linux/sysctl.h> #include <linux/spinlock.h> #include <linux/debugfs.h> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 4d9e6b288dd..0334000676d 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -28,7 +28,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> -#include <linux/utsname.h> #include <linux/init.h> #include <linux/spinlock.h> #include <linux/delay.h> diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 83a9f2972ac..437698e9465 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -30,7 +30,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> -#include <linux/utsname.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/random.h> diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index f8b653fcd4d..83bcaf266b3 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -30,7 +30,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> -#include <linux/utsname.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/random.h> diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 43e6e328056..d9fa3d22e17 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -30,7 +30,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> -#include <linux/utsname.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/random.h> diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 98569e86c61..52ec020ea78 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -30,7 +30,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> -#include <linux/utsname.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/random.h> diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 756f5b0998e..00f53b2aea7 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -30,7 +30,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> -#include <linux/utsname.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/random.h> diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 24feb449a1d..4cc3c890a2c 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -28,7 +28,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> -#include <linux/utsname.h> #include <linux/init.h> #include <linux/random.h> #include <linux/statfs.h> diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 579dd1b1110..e3421030a69 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -38,7 +38,6 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/pagemap.h> -#include <linux/utsname.h> #include <linux/namei.h> #define MLOG_MASK_PREFIX ML_NAMEI diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 171e052c07b..c7bff4f603f 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -97,7 +97,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v) "Committed_AS: %8lu kB\n" "VmallocTotal: %8lu kB\n" "VmallocUsed: %8lu kB\n" - "VmallocChunk: %8lu kB\n", + "VmallocChunk: %8lu kB\n" +#ifdef CONFIG_MEMORY_FAILURE + "HardwareCorrupted: %8lu kB\n" +#endif + , K(i.totalram), K(i.freeram), K(i.bufferram), @@ -144,6 +148,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) (unsigned long)VMALLOC_TOTAL >> 10, vmi.used >> 10, vmi.largest_chunk >> 10 +#ifdef CONFIG_MEMORY_FAILURE + ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10) +#endif ); hugetlb_report_meminfo(m); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9b1e4e9a16b..f667e8aeabd 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, /* careful: calling conventions are nasty here */ res = count; - error = table->proc_handler(table, write, filp, buf, &res, ppos); + error = table->proc_handler(table, write, buf, &res, ppos); if (!error) error = res; out: diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 0c10a0b3f14..766b1d45605 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -4,13 +4,18 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/time.h> +#include <linux/kernel_stat.h> #include <asm/cputime.h> static int uptime_proc_show(struct seq_file *m, void *v) { struct timespec uptime; struct timespec idle; - cputime_t idletime = cputime_add(init_task.utime, init_task.stime); + int i; + cputime_t idletime = cputime_zero; + + for_each_possible_cpu(i) + idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle); do_posix_clock_monotonic_gettime(&uptime); monotonic_to_bootbased(&uptime); diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 11f0c06316d..32fae4040eb 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -69,14 +69,11 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) /* make various checks */ order = get_order(newsize); if (unlikely(order >= MAX_ORDER)) - goto too_big; + return -EFBIG; - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && newsize > limit) - goto fsize_exceeded; - - if (newsize > inode->i_sb->s_maxbytes) - goto too_big; + ret = inode_newsize_ok(inode, newsize); + if (ret) + return ret; i_size_write(inode, newsize); @@ -118,12 +115,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) return 0; - fsize_exceeded: - send_sig(SIGXFSZ, current, 0); - too_big: - return -EFBIG; - - add_error: +add_error: while (loop < npages) __free_page(pages + loop++); return ret; diff --git a/fs/read_write.c b/fs/read_write.c index 6c8c55dec2b..3ac28987f22 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -839,9 +839,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); pos = *ppos; - retval = -EINVAL; - if (unlikely(pos < 0)) - goto fput_out; if (unlikely(pos + count > max)) { retval = -EOVERFLOW; if (pos >= max) diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 47f132df0c3..c117fa80d1e 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -528,7 +528,7 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent) pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK; root = romfs_iget(sb, pos); - if (!root) + if (IS_ERR(root)) goto error; sb->s_root = d_alloc_root(root); diff --git a/fs/seq_file.c b/fs/seq_file.c index 6c959275f2d..eae7d9dbf3f 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path); */ int seq_path(struct seq_file *m, struct path *path, char *esc) { - if (m->count < m->size) { - char *s = m->buf + m->count; - char *p = d_path(path, s, m->size - m->count); + char *buf; + size_t size = seq_get_buf(m, &buf); + int res = -1; + + if (size) { + char *p = d_path(path, buf, size); if (!IS_ERR(p)) { - s = mangle_path(s, p, esc); - if (s) { - p = m->buf + m->count; - m->count = s - m->buf; - return s - p; - } + char *end = mangle_path(buf, p, esc); + if (end) + res = end - buf; } } - m->count = m->size; - return -1; + seq_commit(m, res); + + return res; } EXPORT_SYMBOL(seq_path); @@ -454,26 +455,28 @@ EXPORT_SYMBOL(seq_path); int seq_path_root(struct seq_file *m, struct path *path, struct path *root, char *esc) { - int err = -ENAMETOOLONG; - if (m->count < m->size) { - char *s = m->buf + m->count; + char *buf; + size_t size = seq_get_buf(m, &buf); + int res = -ENAMETOOLONG; + + if (size) { char *p; spin_lock(&dcache_lock); - p = __d_path(path, root, s, m->size - m->count); + p = __d_path(path, root, buf, size); spin_unlock(&dcache_lock); - err = PTR_ERR(p); + res = PTR_ERR(p); if (!IS_ERR(p)) { - s = mangle_path(s, p, esc); - if (s) { - p = m->buf + m->count; - m->count = s - m->buf; - return 0; - } + char *end = mangle_path(buf, p, esc); + if (end) + res = end - buf; + else + res = -ENAMETOOLONG; } } - m->count = m->size; - return err; + seq_commit(m, res); + + return res < 0 ? res : 0; } /* @@ -481,20 +484,21 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root, */ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) { - if (m->count < m->size) { - char *s = m->buf + m->count; - char *p = dentry_path(dentry, s, m->size - m->count); + char *buf; + size_t size = seq_get_buf(m, &buf); + int res = -1; + + if (size) { + char *p = dentry_path(dentry, buf, size); if (!IS_ERR(p)) { - s = mangle_path(s, p, esc); - if (s) { - p = m->buf + m->count; - m->count = s - m->buf; - return s - p; - } + char *end = mangle_path(buf, p, esc); + if (end) + res = end - buf; } } - m->count = m->size; - return -1; + seq_commit(m, res); + + return res; } int seq_bitmap(struct seq_file *m, const unsigned long *bits, diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 1402d2d54f5..1c4c8f08997 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -459,14 +459,8 @@ smb_show_options(struct seq_file *s, struct vfsmount *m) static void smb_unload_nls(struct smb_sb_info *server) { - if (server->remote_nls) { - unload_nls(server->remote_nls); - server->remote_nls = NULL; - } - if (server->local_nls) { - unload_nls(server->local_nls); - server->local_nls = NULL; - } + unload_nls(server->remote_nls); + unload_nls(server->local_nls); } static void diff --git a/fs/super.c b/fs/super.c index 0e7207b9815..19eb70b374b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -465,6 +465,48 @@ rescan: } EXPORT_SYMBOL(get_super); + +/** + * get_active_super - get an active reference to the superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device given. Returns the superblock with an active + * reference and s_umount held exclusively or %NULL if none was found. + */ +struct super_block *get_active_super(struct block_device *bdev) +{ + struct super_block *sb; + + if (!bdev) + return NULL; + + spin_lock(&sb_lock); + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_bdev != bdev) + continue; + + sb->s_count++; + spin_unlock(&sb_lock); + down_write(&sb->s_umount); + if (sb->s_root) { + spin_lock(&sb_lock); + if (sb->s_count > S_BIAS) { + atomic_inc(&sb->s_active); + sb->s_count--; + spin_unlock(&sb_lock); + return sb; + } + spin_unlock(&sb_lock); + } + up_write(&sb->s_umount); + put_super(sb); + yield(); + spin_lock(&sb_lock); + } + spin_unlock(&sb_lock); + return NULL; +} struct super_block * user_get_super(dev_t dev) { @@ -527,11 +569,15 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) { int retval; int remount_rw; - + + if (sb->s_frozen != SB_UNFROZEN) + return -EBUSY; + #ifdef CONFIG_BLOCK if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) return -EACCES; #endif + if (flags & MS_RDONLY) acct_auto_close(sb); shrink_dcache_sb(sb); @@ -743,9 +789,14 @@ int get_sb_bdev(struct file_system_type *fs_type, * will protect the lockfs code from trying to start a snapshot * while we are mounting */ - down(&bdev->bd_mount_sem); + mutex_lock(&bdev->bd_fsfreeze_mutex); + if (bdev->bd_fsfreeze_count > 0) { + mutex_unlock(&bdev->bd_fsfreeze_mutex); + error = -EBUSY; + goto error_bdev; + } s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); - up(&bdev->bd_mount_sem); + mutex_unlock(&bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) goto error_s; @@ -892,6 +943,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void if (error) goto out_sb; + /* + * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE + * but s_maxbytes was an unsigned long long for many releases. Throw + * this warning for a little while to try and catch filesystems that + * violate this rule. This warning should be either removed or + * converted to a BUG() in 2.6.34. + */ + WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " + "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes); + mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; up_write(&mnt->mnt_sb->s_umount); diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index d5e5559e31d..381854461b2 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1635,4 +1635,5 @@ const struct address_space_operations xfs_address_space_operations = { .direct_IO = xfs_vm_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 916c0ffb608..c5bc67c4e3b 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -26,7 +26,6 @@ STATIC int xfs_stats_clear_proc_handler( ctl_table *ctl, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -34,7 +33,7 @@ xfs_stats_clear_proc_handler( int c, ret, *valp = ctl->data; __uint32_t vn_active; - ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); if (!ret && write && *valp) { printk("XFS Clearing xfsstats\n"); diff --git a/include/acpi/button.h b/include/acpi/button.h new file mode 100644 index 00000000000..97eea0e4c01 --- /dev/null +++ b/include/acpi/button.h @@ -0,0 +1,25 @@ +#ifndef ACPI_BUTTON_H +#define ACPI_BUTTON_H + +#include <linux/notifier.h> + +#if defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) +extern int acpi_lid_notifier_register(struct notifier_block *nb); +extern int acpi_lid_notifier_unregister(struct notifier_block *nb); +extern int acpi_lid_open(void); +#else +static inline int acpi_lid_notifier_register(struct notifier_block *nb) +{ + return 0; +} +static inline int acpi_lid_notifier_unregister(struct notifier_block *nb) +{ + return 0; +} +static inline int acpi_lid_open(void) +{ + return 1; +} +#endif /* defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) */ + +#endif /* ACPI_BUTTON_H */ diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h index 4d3e48373e7..0c3dd860392 100644 --- a/include/asm-generic/fcntl.h +++ b/include/asm-generic/fcntl.h @@ -73,6 +73,19 @@ #define F_SETSIG 10 /* for sockets. */ #define F_GETSIG 11 /* for sockets. */ #endif +#ifndef F_SETOWN_EX +#define F_SETOWN_EX 12 +#define F_GETOWN_EX 13 +#endif + +#define F_OWNER_TID 0 +#define F_OWNER_PID 1 +#define F_OWNER_GID 2 + +struct f_owner_ex { + int type; + pid_t pid; +}; /* for F_[GET|SET]FL */ #define FD_CLOEXEC 1 /* actually anything with low bit set goes */ diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h index dd63bd38864..5ee13b2fd22 100644 --- a/include/asm-generic/mman-common.h +++ b/include/asm-generic/mman-common.h @@ -34,6 +34,7 @@ #define MADV_REMOVE 9 /* remove these pages & resources */ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ +#define MADV_HWPOISON 100 /* poison a page for testing */ #define MADV_MERGEABLE 12 /* KSM may merge identical pages */ #define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h index c840719a8c5..942d30b5aab 100644 --- a/include/asm-generic/siginfo.h +++ b/include/asm-generic/siginfo.h @@ -82,6 +82,7 @@ typedef struct siginfo { #ifdef __ARCH_SI_TRAPNO int _trapno; /* TRAP # which caused the signal */ #endif + short _addr_lsb; /* LSB of the reported address */ } _sigfault; /* SIGPOLL */ @@ -112,6 +113,7 @@ typedef struct siginfo { #ifdef __ARCH_SI_TRAPNO #define si_trapno _sifields._sigfault._trapno #endif +#define si_addr_lsb _sifields._sigfault._addr_lsb #define si_band _sifields._sigpoll._band #define si_fd _sifields._sigpoll._fd @@ -192,7 +194,11 @@ typedef struct siginfo { #define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */ #define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */ #define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */ -#define NSIGBUS 3 +/* hardware memory error consumed on a machine check: action required */ +#define BUS_MCEERR_AR (__SI_FAULT|4) +/* hardware memory error detected in process but not consumed: action optional*/ +#define BUS_MCEERR_AO (__SI_FAULT|5) +#define NSIGBUS 5 /* * SIGTRAP si_codes diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index 88bada2ebc4..510df36dd5d 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -37,9 +37,6 @@ #ifndef parent_node #define parent_node(node) ((void)(node),0) #endif -#ifndef node_to_cpumask -#define node_to_cpumask(node) ((void)node, cpu_online_map) -#endif #ifndef cpumask_of_node #define cpumask_of_node(node) ((void)node, cpu_online_mask) #endif @@ -55,18 +52,4 @@ #endif /* CONFIG_NUMA */ -/* - * returns pointer to cpumask for specified node - * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" - */ -#ifndef node_to_cpumask_ptr - -#define node_to_cpumask_ptr(v, node) \ - cpumask_t _##v = node_to_cpumask(node); \ - const cpumask_t *v = &_##v - -#define node_to_cpumask_ptr_next(v, node) \ - _##v = node_to_cpumask(node) -#endif - #endif /* _ASM_GENERIC_TOPOLOGY_H */ diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 853508499d2..3f6e545609b 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -552,6 +552,7 @@ {0x8086, 0x2e12, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0x2e32, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ + {0x8086, 0x2e42, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ {0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \ diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 8e1e92583fb..7e0cb1da92e 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -185,6 +185,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_GET_APERTURE 0x23 #define DRM_I915_GEM_MMAP_GTT 0x24 #define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25 +#define DRM_I915_GEM_MADVISE 0x26 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -221,6 +222,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) #define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) #define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_intel_get_pipe_from_crtc_id) +#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -667,4 +669,21 @@ struct drm_i915_get_pipe_from_crtc_id { __u32 pipe; }; +#define I915_MADV_WILLNEED 0 +#define I915_MADV_DONTNEED 1 +#define __I915_MADV_PURGED 2 /* internal state */ + +struct drm_i915_gem_madvise { + /** Handle of the buffer to change the backing store advice */ + __u32 handle; + + /* Advice: either the buffer will be needed again in the near future, + * or wont be and could be discarded under memory pressure. + */ + __u32 madv; + + /** Whether the backing store still exists. */ + __u32 retained; +}; + #endif /* _I915_DRM_H_ */ diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 5fc2ef8d97f..a1c486a88e8 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -58,25 +58,60 @@ struct dma_chan_ref { * array. * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a * dependency chain - * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. + * @ASYNC_TX_FENCE: specify that the next operation in the dependency + * chain uses this operation's result as an input */ enum async_tx_flags { ASYNC_TX_XOR_ZERO_DST = (1 << 0), ASYNC_TX_XOR_DROP_DST = (1 << 1), - ASYNC_TX_ACK = (1 << 3), - ASYNC_TX_DEP_ACK = (1 << 4), + ASYNC_TX_ACK = (1 << 2), + ASYNC_TX_FENCE = (1 << 3), +}; + +/** + * struct async_submit_ctl - async_tx submission/completion modifiers + * @flags: submission modifiers + * @depend_tx: parent dependency of the current operation being submitted + * @cb_fn: callback routine to run at operation completion + * @cb_param: parameter for the callback routine + * @scribble: caller provided space for dma/page address conversions + */ +struct async_submit_ctl { + enum async_tx_flags flags; + struct dma_async_tx_descriptor *depend_tx; + dma_async_tx_callback cb_fn; + void *cb_param; + void *scribble; }; #ifdef CONFIG_DMA_ENGINE #define async_tx_issue_pending_all dma_issue_pending_all + +/** + * async_tx_issue_pending - send pending descriptor to the hardware channel + * @tx: descriptor handle to retrieve hardware context + * + * Note: any dependent operations will have already been issued by + * async_tx_channel_switch, or (in the case of no channel switch) will + * be already pending on this channel. + */ +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + if (likely(tx)) { + struct dma_chan *chan = tx->chan; + struct dma_device *dma = chan->device; + + dma->device_issue_pending(chan); + } +} #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL #include <asm/async_tx.h> #else #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ __async_tx_find_channel(dep, type) struct dma_chan * -__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type); +__async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type); #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ #else static inline void async_tx_issue_pending_all(void) @@ -84,10 +119,16 @@ static inline void async_tx_issue_pending_all(void) do { } while (0); } +static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) +{ + do { } while (0); +} + static inline struct dma_chan * -async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, - enum dma_transaction_type tx_type, struct page **dst, int dst_count, - struct page **src, int src_count, size_t len) +async_tx_find_channel(struct async_submit_ctl *submit, + enum dma_transaction_type tx_type, struct page **dst, + int dst_count, struct page **src, int src_count, + size_t len) { return NULL; } @@ -99,46 +140,70 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, * @cb_fn_param: parameter to pass to the callback routine */ static inline void -async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) +async_tx_sync_epilog(struct async_submit_ctl *submit) { - if (cb_fn) - cb_fn(cb_fn_param); + if (submit->cb_fn) + submit->cb_fn(submit->cb_param); } -void -async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, - enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +typedef union { + unsigned long addr; + struct page *page; + dma_addr_t dma; +} addr_conv_t; + +static inline void +init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags, + struct dma_async_tx_descriptor *tx, + dma_async_tx_callback cb_fn, void *cb_param, + addr_conv_t *scribble) +{ + args->flags = flags; + args->depend_tx = tx; + args->cb_fn = cb_fn; + args->cb_param = cb_param; + args->scribble = scribble; +} + +void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + int src_cnt, size_t len, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_xor_zero_sum(struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, - u32 *result, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, enum sum_check_flags *result, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, - unsigned int src_offset, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + unsigned int src_offset, size_t len, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_memset(struct page *dest, int val, unsigned int offset, - size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); + size_t len, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, + size_t len, enum sum_check_flags *pqres, struct page *spare, + struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * +async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb, + struct page **ptrs, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_trigger_callback(enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param); +async_raid6_datap_recov(int src_num, size_t bytes, int faila, + struct page **ptrs, struct async_submit_ctl *submit); void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 2046b5b8af4..aece486ac73 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -120,7 +120,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); -extern int set_binfmt(struct linux_binfmt *new); +extern void set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); #endif /* __KERNEL__ */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 90bba9e6228..b62bb9294d0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -141,6 +141,38 @@ enum { CGRP_WAIT_ON_RMDIR, }; +/* which pidlist file are we talking about? */ +enum cgroup_filetype { + CGROUP_FILE_PROCS, + CGROUP_FILE_TASKS, +}; + +/* + * A pidlist is a list of pids that virtually represents the contents of one + * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists, + * a pair (one each for procs, tasks) for each pid namespace that's relevant + * to the cgroup. + */ +struct cgroup_pidlist { + /* + * used to find which pidlist is wanted. doesn't change as long as + * this particular list stays in the list. + */ + struct { enum cgroup_filetype type; struct pid_namespace *ns; } key; + /* array of xids */ + pid_t *list; + /* how many elements the above list has */ + int length; + /* how many files are using the current array */ + int use_count; + /* each of these stored in a list by its cgroup */ + struct list_head links; + /* pointer to the cgroup we belong to, for list removal purposes */ + struct cgroup *owner; + /* protects the other fields */ + struct rw_semaphore mutex; +}; + struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ @@ -179,11 +211,12 @@ struct cgroup { */ struct list_head release_list; - /* pids_mutex protects pids_list and cached pid arrays. */ - struct rw_semaphore pids_mutex; - - /* Linked list of struct cgroup_pids */ - struct list_head pids_list; + /* + * list of pidlists, up to two for each namespace (one for procs, one + * for tasks); created on demand. + */ + struct list_head pidlists; + struct mutex pidlist_mutex; /* For RCU-protected deletion */ struct rcu_head rcu_head; @@ -227,6 +260,9 @@ struct css_set { * during subsystem registration (at boot time). */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + + /* For RCU-protected deletion */ + struct rcu_head rcu_head; }; /* @@ -389,10 +425,11 @@ struct cgroup_subsys { struct cgroup *cgrp); int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); - int (*can_attach)(struct cgroup_subsys *ss, - struct cgroup *cgrp, struct task_struct *tsk); + int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct task_struct *tsk, bool threadgroup); void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *tsk); + struct cgroup *old_cgrp, struct task_struct *tsk, + bool threadgroup); void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); int (*populate)(struct cgroup_subsys *ss, diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 7f627775c94..ddb7a97c78c 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -27,8 +27,8 @@ * * configfs Copyright (C) 2005 Oracle. All rights reserved. * - * Please read Documentation/filesystems/configfs.txt before using the - * configfs interface, ESPECIALLY the parts about reference counts and + * Please read Documentation/filesystems/configfs/configfs.txt before using + * the configfs interface, ESPECIALLY the parts about reference counts and * item destructors. */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9b1d458aac6..789cf5f920c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -3,444 +3,37 @@ /* * Cpumasks provide a bitmap suitable for representing the - * set of CPU's in a system, one bit position per CPU number. - * - * The new cpumask_ ops take a "struct cpumask *"; the old ones - * use cpumask_t. - * - * See detailed comments in the file linux/bitmap.h describing the - * data type on which these cpumasks are based. - * - * For details of cpumask_scnprintf() and cpumask_parse_user(), - * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. - * For details of cpulist_scnprintf() and cpulist_parse(), see - * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. - * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c - * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c. - * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c. - * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c. - * - * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . - * Note: The alternate operations with the suffix "_nr" are used - * to limit the range of the loop to nr_cpu_ids instead of - * NR_CPUS when NR_CPUS > 64 for performance reasons. - * If NR_CPUS is <= 64 then most assembler bitmask - * operators execute faster with a constant range, so - * the operator will continue to use NR_CPUS. - * - * Another consideration is that nr_cpu_ids is initialized - * to NR_CPUS and isn't lowered until the possible cpus are - * discovered (including any disabled cpus). So early uses - * will span the entire range of NR_CPUS. - * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . - * - * The obsolescent cpumask operations are: - * - * void cpu_set(cpu, mask) turn on bit 'cpu' in mask - * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask - * void cpus_setall(mask) set all bits - * void cpus_clear(mask) clear all bits - * int cpu_isset(cpu, mask) true iff bit 'cpu' set in mask - * int cpu_test_and_set(cpu, mask) test and set bit 'cpu' in mask - * - * int cpus_and(dst, src1, src2) dst = src1 & src2 [intersection] - * void cpus_or(dst, src1, src2) dst = src1 | src2 [union] - * void cpus_xor(dst, src1, src2) dst = src1 ^ src2 - * int cpus_andnot(dst, src1, src2) dst = src1 & ~src2 - * void cpus_complement(dst, src) dst = ~src - * - * int cpus_equal(mask1, mask2) Does mask1 == mask2? - * int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect? - * int cpus_subset(mask1, mask2) Is mask1 a subset of mask2? - * int cpus_empty(mask) Is mask empty (no bits sets)? - * int cpus_full(mask) Is mask full (all bits sets)? - * int cpus_weight(mask) Hamming weigh - number of set bits - * int cpus_weight_nr(mask) Same using nr_cpu_ids instead of NR_CPUS - * - * void cpus_shift_right(dst, src, n) Shift right - * void cpus_shift_left(dst, src, n) Shift left - * - * int first_cpu(mask) Number lowest set bit, or NR_CPUS - * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS - * int next_cpu_nr(cpu, mask) Next cpu past 'cpu', or nr_cpu_ids - * - * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set - * (can be used as an lvalue) - * CPU_MASK_ALL Initializer - all bits set - * CPU_MASK_NONE Initializer - no bits set - * unsigned long *cpus_addr(mask) Array of unsigned long's in mask - * - * CPUMASK_ALLOC kmalloc's a structure that is a composite of many cpumask_t - * variables, and CPUMASK_PTR provides pointers to each field. - * - * The structure should be defined something like this: - * struct my_cpumasks { - * cpumask_t mask1; - * cpumask_t mask2; - * }; - * - * Usage is then: - * CPUMASK_ALLOC(my_cpumasks); - * CPUMASK_PTR(mask1, my_cpumasks); - * CPUMASK_PTR(mask2, my_cpumasks); - * - * --- DO NOT reference cpumask_t pointers until this check --- - * if (my_cpumasks == NULL) - * "kmalloc failed"... - * - * References are now pointers to the cpumask_t variables (*mask1, ...) - * - *if NR_CPUS > BITS_PER_LONG - * CPUMASK_ALLOC(m) Declares and allocates struct m *m = - * kmalloc(sizeof(*m), GFP_KERNEL) - * CPUMASK_FREE(m) Macro for kfree(m) - *else - * CPUMASK_ALLOC(m) Declares struct m _m, *m = &_m - * CPUMASK_FREE(m) Nop - *endif - * CPUMASK_PTR(v, m) Declares cpumask_t *v = &(m->v) - * ------------------------------------------------------------------------ - * - * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing - * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask - * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing - * int cpulist_parse(buf, map) Parse ascii string as cpulist - * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit) - * void cpus_remap(dst, src, old, new) *dst = map(old, new)(src) - * void cpus_onto(dst, orig, relmap) *dst = orig relative to relmap - * void cpus_fold(dst, orig, sz) dst bits = orig bits mod sz - * - * for_each_cpu_mask(cpu, mask) for-loop cpu over mask using NR_CPUS - * for_each_cpu_mask_nr(cpu, mask) for-loop cpu over mask using nr_cpu_ids - * - * int num_online_cpus() Number of online CPUs - * int num_possible_cpus() Number of all possible CPUs - * int num_present_cpus() Number of present CPUs - * - * int cpu_online(cpu) Is some cpu online? - * int cpu_possible(cpu) Is some cpu possible? - * int cpu_present(cpu) Is some cpu present (can schedule)? - * - * int any_online_cpu(mask) First online cpu in mask - * - * for_each_possible_cpu(cpu) for-loop cpu over cpu_possible_map - * for_each_online_cpu(cpu) for-loop cpu over cpu_online_map - * for_each_present_cpu(cpu) for-loop cpu over cpu_present_map - * - * Subtlety: - * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway) - * to generate slightly worse code. Note for example the additional - * 40 lines of assembly code compiling the "for each possible cpu" - * loops buried in the disk_stat_read() macros calls when compiling - * drivers/block/genhd.c (arch i386, CONFIG_SMP=y). So use a simple - * one-line #define for cpu_isset(), instead of wrapping an inline - * inside a macro, the way we do the other calls. + * set of CPU's in a system, one bit position per CPU number. In general, + * only nr_cpu_ids (<= NR_CPUS) bits are valid. */ - #include <linux/kernel.h> #include <linux/threads.h> #include <linux/bitmap.h> typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; -extern cpumask_t _unused_cpumask_arg_; - -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) -static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) -{ - set_bit(cpu, dstp->bits); -} - -#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst)) -static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp) -{ - clear_bit(cpu, dstp->bits); -} - -#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS) -static inline void __cpus_setall(cpumask_t *dstp, int nbits) -{ - bitmap_fill(dstp->bits, nbits); -} - -#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS) -static inline void __cpus_clear(cpumask_t *dstp, int nbits) -{ - bitmap_zero(dstp->bits, nbits); -} - -/* No static inline type checking - see Subtlety (1) above. */ -#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits) - -#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask)) -static inline int __cpu_test_and_set(int cpu, cpumask_t *addr) -{ - return test_and_set_bit(cpu, addr->bits); -} - -#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS) -static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS) -static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS) -static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_andnot(dst, src1, src2) \ - __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS) -static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); -} - -#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS) -static inline void __cpus_complement(cpumask_t *dstp, - const cpumask_t *srcp, int nbits) -{ - bitmap_complement(dstp->bits, srcp->bits, nbits); -} - -#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS) -static inline int __cpus_equal(const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_equal(src1p->bits, src2p->bits, nbits); -} - -#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS) -static inline int __cpus_intersects(const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_intersects(src1p->bits, src2p->bits, nbits); -} - -#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS) -static inline int __cpus_subset(const cpumask_t *src1p, - const cpumask_t *src2p, int nbits) -{ - return bitmap_subset(src1p->bits, src2p->bits, nbits); -} - -#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS) -static inline int __cpus_empty(const cpumask_t *srcp, int nbits) -{ - return bitmap_empty(srcp->bits, nbits); -} - -#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS) -static inline int __cpus_full(const cpumask_t *srcp, int nbits) -{ - return bitmap_full(srcp->bits, nbits); -} - -#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS) -static inline int __cpus_weight(const cpumask_t *srcp, int nbits) -{ - return bitmap_weight(srcp->bits, nbits); -} - -#define cpus_shift_right(dst, src, n) \ - __cpus_shift_right(&(dst), &(src), (n), NR_CPUS) -static inline void __cpus_shift_right(cpumask_t *dstp, - const cpumask_t *srcp, int n, int nbits) -{ - bitmap_shift_right(dstp->bits, srcp->bits, n, nbits); -} - -#define cpus_shift_left(dst, src, n) \ - __cpus_shift_left(&(dst), &(src), (n), NR_CPUS) -static inline void __cpus_shift_left(cpumask_t *dstp, - const cpumask_t *srcp, int n, int nbits) -{ - bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); -} -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ /** - * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * - * @bitmap: the bitmap - * - * There are a few places where cpumask_var_t isn't appropriate and - * static cpumasks must be used (eg. very early boot), yet we don't - * expose the definition of 'struct cpumask'. - * - * This does the conversion, and can be used as a constant initializer. - */ -#define to_cpumask(bitmap) \ - ((struct cpumask *)(1 ? (bitmap) \ - : (void *)sizeof(__check_is_bitmap(bitmap)))) - -static inline int __check_is_bitmap(const unsigned long *bitmap) -{ - return 1; -} - -/* - * Special-case data structure for "single bit set only" constant CPU masks. + * cpumask_bits - get the bits in a cpumask + * @maskp: the struct cpumask * * - * We pre-generate all the 64 (or 32) possible bit positions, with enough - * padding to the left and the right, and return the constant pointer - * appropriately offset. - */ -extern const unsigned long - cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; - -static inline const struct cpumask *get_cpu_mask(unsigned int cpu) -{ - const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; - p -= cpu / BITS_PER_LONG; - return to_cpumask(p); -} - -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -/* - * In cases where we take the address of the cpumask immediately, - * gcc optimizes it out (it's a constant) and there's no huge stack - * variable created: + * You should only assume nr_cpu_ids bits of this mask are valid. This is + * a macro so it's const-correct. */ -#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu)) - - -#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) - -#if NR_CPUS <= BITS_PER_LONG - -#define CPU_MASK_ALL \ -(cpumask_t) { { \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ -} } - -#define CPU_MASK_ALL_PTR (&CPU_MASK_ALL) - -#else - -#define CPU_MASK_ALL \ -(cpumask_t) { { \ - [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ -} } - -/* cpu_mask_all is in init/main.c */ -extern cpumask_t cpu_mask_all; -#define CPU_MASK_ALL_PTR (&cpu_mask_all) - -#endif - -#define CPU_MASK_NONE \ -(cpumask_t) { { \ - [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ -} } - -#define CPU_MASK_CPU0 \ -(cpumask_t) { { \ - [0] = 1UL \ -} } - -#define cpus_addr(src) ((src).bits) - -#if NR_CPUS > BITS_PER_LONG -#define CPUMASK_ALLOC(m) struct m *m = kmalloc(sizeof(*m), GFP_KERNEL) -#define CPUMASK_FREE(m) kfree(m) -#else -#define CPUMASK_ALLOC(m) struct m _m, *m = &_m -#define CPUMASK_FREE(m) -#endif -#define CPUMASK_PTR(v, m) cpumask_t *v = &(m->v) - -#define cpu_remap(oldbit, old, new) \ - __cpu_remap((oldbit), &(old), &(new), NR_CPUS) -static inline int __cpu_remap(int oldbit, - const cpumask_t *oldp, const cpumask_t *newp, int nbits) -{ - return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); -} - -#define cpus_remap(dst, src, old, new) \ - __cpus_remap(&(dst), &(src), &(old), &(new), NR_CPUS) -static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp, - const cpumask_t *oldp, const cpumask_t *newp, int nbits) -{ - bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); -} - -#define cpus_onto(dst, orig, relmap) \ - __cpus_onto(&(dst), &(orig), &(relmap), NR_CPUS) -static inline void __cpus_onto(cpumask_t *dstp, const cpumask_t *origp, - const cpumask_t *relmapp, int nbits) -{ - bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits); -} - -#define cpus_fold(dst, orig, sz) \ - __cpus_fold(&(dst), &(orig), sz, NR_CPUS) -static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, - int sz, int nbits) -{ - bitmap_fold(dstp->bits, origp->bits, sz, nbits); -} -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ +#define cpumask_bits(maskp) ((maskp)->bits) #if NR_CPUS == 1 - #define nr_cpu_ids 1 -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -#define first_cpu(src) ({ (void)(src); 0; }) -#define next_cpu(n, src) ({ (void)(src); 1; }) -#define any_online_cpu(mask) 0 -#define for_each_cpu_mask(cpu, mask) \ - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ -#else /* NR_CPUS > 1 */ - +#else extern int nr_cpu_ids; -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -int __first_cpu(const cpumask_t *srcp); -int __next_cpu(int n, const cpumask_t *srcp); -int __any_online_cpu(const cpumask_t *mask); - -#define first_cpu(src) __first_cpu(&(src)) -#define next_cpu(n, src) __next_cpu((n), &(src)) -#define any_online_cpu(mask) __any_online_cpu(&(mask)) -#define for_each_cpu_mask(cpu, mask) \ - for ((cpu) = -1; \ - (cpu) = next_cpu((cpu), (mask)), \ - (cpu) < NR_CPUS; ) -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ #endif -#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS -#if NR_CPUS <= 64 - -#define next_cpu_nr(n, src) next_cpu(n, src) -#define cpus_weight_nr(cpumask) cpus_weight(cpumask) -#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) - -#else /* NR_CPUS > 64 */ - -int __next_cpu_nr(int n, const cpumask_t *srcp); -#define next_cpu_nr(n, src) __next_cpu_nr((n), &(src)) -#define cpus_weight_nr(cpumask) __cpus_weight(&(cpumask), nr_cpu_ids) -#define for_each_cpu_mask_nr(cpu, mask) \ - for ((cpu) = -1; \ - (cpu) = next_cpu_nr((cpu), (mask)), \ - (cpu) < nr_cpu_ids; ) - -#endif /* NR_CPUS > 64 */ -#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ +#ifdef CONFIG_CPUMASK_OFFSTACK +/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, + * not all bits may be allocated. */ +#define nr_cpumask_bits nr_cpu_ids +#else +#define nr_cpumask_bits NR_CPUS +#endif /* * The following particular system cpumasks and operations manage @@ -487,12 +80,6 @@ extern const struct cpumask *const cpu_online_mask; extern const struct cpumask *const cpu_present_mask; extern const struct cpumask *const cpu_active_mask; -/* These strip const, as traditionally they weren't const. */ -#define cpu_possible_map (*(cpumask_t *)cpu_possible_mask) -#define cpu_online_map (*(cpumask_t *)cpu_online_mask) -#define cpu_present_map (*(cpumask_t *)cpu_present_mask) -#define cpu_active_map (*(cpumask_t *)cpu_active_mask) - #if NR_CPUS > 1 #define num_online_cpus() cpumask_weight(cpu_online_mask) #define num_possible_cpus() cpumask_weight(cpu_possible_mask) @@ -511,35 +98,6 @@ extern const struct cpumask *const cpu_active_mask; #define cpu_active(cpu) ((cpu) == 0) #endif -#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) - -/* These are the new versions of the cpumask operators: passed by pointer. - * The older versions will be implemented in terms of these, then deleted. */ -#define cpumask_bits(maskp) ((maskp)->bits) - -#if NR_CPUS <= BITS_PER_LONG -#define CPU_BITS_ALL \ -{ \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ -} - -#else /* NR_CPUS > BITS_PER_LONG */ - -#define CPU_BITS_ALL \ -{ \ - [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ - [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ -} -#endif /* NR_CPUS > BITS_PER_LONG */ - -#ifdef CONFIG_CPUMASK_OFFSTACK -/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, - * not all bits may be allocated. */ -#define nr_cpumask_bits nr_cpu_ids -#else -#define nr_cpumask_bits NR_CPUS -#endif - /* verify cpu argument to cpumask_* operators */ static inline unsigned int cpumask_check(unsigned int cpu) { @@ -1100,4 +658,241 @@ void set_cpu_active(unsigned int cpu, bool active); void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); void init_cpu_online(const struct cpumask *src); + +/** + * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * + * @bitmap: the bitmap + * + * There are a few places where cpumask_var_t isn't appropriate and + * static cpumasks must be used (eg. very early boot), yet we don't + * expose the definition of 'struct cpumask'. + * + * This does the conversion, and can be used as a constant initializer. + */ +#define to_cpumask(bitmap) \ + ((struct cpumask *)(1 ? (bitmap) \ + : (void *)sizeof(__check_is_bitmap(bitmap)))) + +static inline int __check_is_bitmap(const unsigned long *bitmap) +{ + return 1; +} + +/* + * Special-case data structure for "single bit set only" constant CPU masks. + * + * We pre-generate all the 64 (or 32) possible bit positions, with enough + * padding to the left and the right, and return the constant pointer + * appropriately offset. + */ +extern const unsigned long + cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; + +static inline const struct cpumask *get_cpu_mask(unsigned int cpu) +{ + const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; + p -= cpu / BITS_PER_LONG; + return to_cpumask(p); +} + +#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) + +#if NR_CPUS <= BITS_PER_LONG +#define CPU_BITS_ALL \ +{ \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} + +#else /* NR_CPUS > BITS_PER_LONG */ + +#define CPU_BITS_ALL \ +{ \ + [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} +#endif /* NR_CPUS > BITS_PER_LONG */ + +/* + * + * From here down, all obsolete. Use cpumask_ variants! + * + */ +#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS +/* These strip const, as traditionally they weren't const. */ +#define cpu_possible_map (*(cpumask_t *)cpu_possible_mask) +#define cpu_online_map (*(cpumask_t *)cpu_online_mask) +#define cpu_present_map (*(cpumask_t *)cpu_present_mask) +#define cpu_active_map (*(cpumask_t *)cpu_active_mask) + +#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu)) + +#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) + +#if NR_CPUS <= BITS_PER_LONG + +#define CPU_MASK_ALL \ +(cpumask_t) { { \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} } + +#else + +#define CPU_MASK_ALL \ +(cpumask_t) { { \ + [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} } + +#endif + +#define CPU_MASK_NONE \ +(cpumask_t) { { \ + [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ +} } + +#define CPU_MASK_CPU0 \ +(cpumask_t) { { \ + [0] = 1UL \ +} } + +#if NR_CPUS == 1 +#define first_cpu(src) ({ (void)(src); 0; }) +#define next_cpu(n, src) ({ (void)(src); 1; }) +#define any_online_cpu(mask) 0 +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#else /* NR_CPUS > 1 */ +int __first_cpu(const cpumask_t *srcp); +int __next_cpu(int n, const cpumask_t *srcp); +int __any_online_cpu(const cpumask_t *mask); + +#define first_cpu(src) __first_cpu(&(src)) +#define next_cpu(n, src) __next_cpu((n), &(src)) +#define any_online_cpu(mask) __any_online_cpu(&(mask)) +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = -1; \ + (cpu) = next_cpu((cpu), (mask)), \ + (cpu) < NR_CPUS; ) +#endif /* SMP */ + +#if NR_CPUS <= 64 + +#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) + +#else /* NR_CPUS > 64 */ + +int __next_cpu_nr(int n, const cpumask_t *srcp); +#define for_each_cpu_mask_nr(cpu, mask) \ + for ((cpu) = -1; \ + (cpu) = __next_cpu_nr((cpu), &(mask)), \ + (cpu) < nr_cpu_ids; ) + +#endif /* NR_CPUS > 64 */ + +#define cpus_addr(src) ((src).bits) + +#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) +static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) +{ + set_bit(cpu, dstp->bits); +} + +#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst)) +static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp) +{ + clear_bit(cpu, dstp->bits); +} + +#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS) +static inline void __cpus_setall(cpumask_t *dstp, int nbits) +{ + bitmap_fill(dstp->bits, nbits); +} + +#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS) +static inline void __cpus_clear(cpumask_t *dstp, int nbits) +{ + bitmap_zero(dstp->bits, nbits); +} + +/* No static inline type checking - see Subtlety (1) above. */ +#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits) + +#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask)) +static inline int __cpu_test_and_set(int cpu, cpumask_t *addr) +{ + return test_and_set_bit(cpu, addr->bits); +} + +#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS) +static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_andnot(dst, src1, src2) \ + __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS) +static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS) +static inline int __cpus_equal(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_equal(src1p->bits, src2p->bits, nbits); +} + +#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS) +static inline int __cpus_intersects(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_intersects(src1p->bits, src2p->bits, nbits); +} + +#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS) +static inline int __cpus_subset(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_subset(src1p->bits, src2p->bits, nbits); +} + +#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS) +static inline int __cpus_empty(const cpumask_t *srcp, int nbits) +{ + return bitmap_empty(srcp->bits, nbits); +} + +#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS) +static inline int __cpus_weight(const cpumask_t *srcp, int nbits) +{ + return bitmap_weight(srcp->bits, nbits); +} + +#define cpus_shift_left(dst, src, n) \ + __cpus_shift_left(&(dst), &(src), (n), NR_CPUS) +static inline void __cpus_shift_left(cpumask_t *dstp, + const cpumask_t *srcp, int n, int nbits) +{ + bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); +} +#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */ + #endif /* __LINUX_CPUMASK_H */ diff --git a/include/linux/cred.h b/include/linux/cred.h index fb371601a3b..4e3387a89cb 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -176,23 +176,7 @@ extern void __invalid_creds(const struct cred *, const char *, unsigned); extern void __validate_process_creds(struct task_struct *, const char *, unsigned); -static inline bool creds_are_invalid(const struct cred *cred) -{ - if (cred->magic != CRED_MAGIC) - return true; - if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers)) - return true; -#ifdef CONFIG_SECURITY_SELINUX - if (selinux_is_enabled()) { - if ((unsigned long) cred->security < PAGE_SIZE) - return true; - if ((*(u32 *)cred->security & 0xffffff00) == - (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8)) - return true; - } -#endif - return false; -} +extern bool creds_are_invalid(const struct cred *cred); static inline void __validate_creds(const struct cred *cred, const char *file, unsigned line) diff --git a/include/linux/dca.h b/include/linux/dca.h index 9c20c7e87d0..d27a7a05718 100644 --- a/include/linux/dca.h +++ b/include/linux/dca.h @@ -20,6 +20,9 @@ */ #ifndef DCA_H #define DCA_H + +#include <linux/pci.h> + /* DCA Provider API */ /* DCA Notifier Interface */ @@ -36,6 +39,12 @@ struct dca_provider { int id; }; +struct dca_domain { + struct list_head node; + struct list_head dca_providers; + struct pci_bus *pci_rc; +}; + struct dca_ops { int (*add_requester) (struct dca_provider *, struct device *); int (*remove_requester) (struct dca_provider *, struct device *); @@ -47,7 +56,7 @@ struct dca_ops { struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size); void free_dca_provider(struct dca_provider *dca); int register_dca_provider(struct dca_provider *dca, struct device *dev); -void unregister_dca_provider(struct dca_provider *dca); +void unregister_dca_provider(struct dca_provider *dca, struct device *dev); static inline void *dca_priv(struct dca_provider *dca) { diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index eb5c2ba2f81..fc1b930f246 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -9,7 +9,7 @@ * 2 as published by the Free Software Foundation. * * debugfs is for people to use instead of /proc or /sys. - * See Documentation/DocBook/kernel-api for more details. + * See Documentation/DocBook/filesystems for more details. */ #ifndef _DEBUGFS_H_ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ffefba81c81..2b9f2ac7ed6 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -48,19 +48,20 @@ enum dma_status { /** * enum dma_transaction_type - DMA transaction types/indexes + * + * Note: The DMA_ASYNC_TX capability is not to be set by drivers. It is + * automatically set as dma devices are registered. */ enum dma_transaction_type { DMA_MEMCPY, DMA_XOR, - DMA_PQ_XOR, - DMA_DUAL_XOR, - DMA_PQ_UPDATE, - DMA_ZERO_SUM, - DMA_PQ_ZERO_SUM, + DMA_PQ, + DMA_XOR_VAL, + DMA_PQ_VAL, DMA_MEMSET, - DMA_MEMCPY_CRC32C, DMA_INTERRUPT, DMA_PRIVATE, + DMA_ASYNC_TX, DMA_SLAVE, }; @@ -70,18 +71,25 @@ enum dma_transaction_type { /** * enum dma_ctrl_flags - DMA flags to augment operation preparation, - * control completion, and communicate status. + * control completion, and communicate status. * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of - * this transaction + * this transaction * @DMA_CTRL_ACK - the descriptor cannot be reused until the client - * acknowledges receipt, i.e. has has a chance to establish any - * dependency chains + * acknowledges receipt, i.e. has has a chance to establish any dependency + * chains * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single * (if not set, do the source dma-unmapping as page) * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single * (if not set, do the destination dma-unmapping as page) + * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q + * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P + * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as + * sources that were the result of a previous operation, in the case of a PQ + * operation it continues the calculation with new sources + * @DMA_PREP_FENCE - tell the driver that subsequent operations depend + * on the result of this operation */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), @@ -90,9 +98,32 @@ enum dma_ctrl_flags { DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), + DMA_PREP_PQ_DISABLE_P = (1 << 6), + DMA_PREP_PQ_DISABLE_Q = (1 << 7), + DMA_PREP_CONTINUE = (1 << 8), + DMA_PREP_FENCE = (1 << 9), }; /** + * enum sum_check_bits - bit position of pq_check_flags + */ +enum sum_check_bits { + SUM_CHECK_P = 0, + SUM_CHECK_Q = 1, +}; + +/** + * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations + * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise + * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise + */ +enum sum_check_flags { + SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P), + SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q), +}; + + +/** * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. * See linux/cpumask.h */ @@ -180,8 +211,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param); * @flags: flags to augment operation preparation, control completion, and * communicate status * @phys: physical address of the descriptor - * @tx_list: driver common field for operations that require multiple - * descriptors * @chan: target channel for this operation * @tx_submit: set the prepared descriptor(s) to be executed by the engine * @callback: routine to call after this operation is complete @@ -195,7 +224,6 @@ struct dma_async_tx_descriptor { dma_cookie_t cookie; enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */ dma_addr_t phys; - struct list_head tx_list; struct dma_chan *chan; dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); dma_async_tx_callback callback; @@ -213,6 +241,11 @@ struct dma_async_tx_descriptor { * @global_node: list_head for global dma_device_list * @cap_mask: one or more dma_capability flags * @max_xor: maximum number of xor sources, 0 if no capability + * @max_pq: maximum number of PQ sources and PQ-continue capability + * @copy_align: alignment shift for memcpy operations + * @xor_align: alignment shift for xor operations + * @pq_align: alignment shift for pq operations + * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @device_alloc_chan_resources: allocate resources and return the @@ -220,7 +253,9 @@ struct dma_async_tx_descriptor { * @device_free_chan_resources: release DMA channel's resources * @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_xor: prepares a xor operation - * @device_prep_dma_zero_sum: prepares a zero_sum operation + * @device_prep_dma_xor_val: prepares a xor validation operation + * @device_prep_dma_pq: prepares a pq operation + * @device_prep_dma_pq_val: prepares a pqzero_sum operation * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation @@ -235,7 +270,13 @@ struct dma_device { struct list_head channels; struct list_head global_node; dma_cap_mask_t cap_mask; - int max_xor; + unsigned short max_xor; + unsigned short max_pq; + u8 copy_align; + u8 xor_align; + u8 pq_align; + u8 fill_align; + #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; struct device *dev; @@ -249,9 +290,17 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_xor)( struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags); - struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( + struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, - size_t len, u32 *result, unsigned long flags); + size_t len, enum sum_check_flags *result, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq)( + struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, + size_t len, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)( + struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, + unsigned int src_cnt, const unsigned char *scf, size_t len, + enum sum_check_flags *pqres, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); @@ -270,6 +319,96 @@ struct dma_device { void (*device_issue_pending)(struct dma_chan *chan); }; +static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len) +{ + size_t mask; + + if (!align) + return true; + mask = (1 << align) - 1; + if (mask & (off1 | off2 | len)) + return false; + return true; +} + +static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->copy_align, off1, off2, len); +} + +static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->xor_align, off1, off2, len); +} + +static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->pq_align, off1, off2, len); +} + +static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1, + size_t off2, size_t len) +{ + return dmaengine_check_align(dev->fill_align, off1, off2, len); +} + +static inline void +dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) +{ + dma->max_pq = maxpq; + if (has_pq_continue) + dma->max_pq |= DMA_HAS_PQ_CONTINUE; +} + +static inline bool dmaf_continue(enum dma_ctrl_flags flags) +{ + return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE; +} + +static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags) +{ + enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P; + + return (flags & mask) == mask; +} + +static inline bool dma_dev_has_pq_continue(struct dma_device *dma) +{ + return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; +} + +static unsigned short dma_dev_to_maxpq(struct dma_device *dma) +{ + return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; +} + +/* dma_maxpq - reduce maxpq in the face of continued operations + * @dma - dma device with PQ capability + * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set + * + * When an engine does not support native continuation we need 3 extra + * source slots to reuse P and Q with the following coefficients: + * 1/ {00} * P : remove P from Q', but use it as a source for P' + * 2/ {01} * Q : use Q to continue Q' calculation + * 3/ {00} * Q : subtract Q from P' to cancel (2) + * + * In the case where P is disabled we only need 1 extra source: + * 1/ {01} * Q : use Q to continue Q' calculation + */ +static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags) +{ + if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags)) + return dma_dev_to_maxpq(dma); + else if (dmaf_p_disabled_continue(flags)) + return dma_dev_to_maxpq(dma) - 1; + else if (dmaf_continue(flags)) + return dma_dev_to_maxpq(dma) - 3; + BUG(); +} + /* --- public DMA engine API --- */ #ifdef CONFIG_DMA_ENGINE @@ -299,7 +438,11 @@ static inline void net_dmaengine_put(void) #ifdef CONFIG_ASYNC_TX_DMA #define async_dmaengine_get() dmaengine_get() #define async_dmaengine_put() dmaengine_put() +#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH +#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX) +#else #define async_dma_find_channel(type) dma_find_channel(type) +#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */ #else static inline void async_dmaengine_get(void) { @@ -312,7 +455,7 @@ async_dma_find_channel(enum dma_transaction_type type) { return NULL; } -#endif +#endif /* CONFIG_ASYNC_TX_DMA */ dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, void *src, size_t len); diff --git a/include/linux/fs.h b/include/linux/fs.h index 51803528b09..2adaa2529f1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -595,6 +595,7 @@ struct address_space_operations { int (*launder_page) (struct page *); int (*is_partially_uptodate) (struct page *, read_descriptor_t *, unsigned long); + int (*error_remove_page)(struct address_space *, struct page *); }; /* @@ -640,7 +641,6 @@ struct block_device { struct super_block * bd_super; int bd_openers; struct mutex bd_mutex; /* open/close mutex */ - struct semaphore bd_mount_sem; struct list_head bd_inodes; void * bd_holder; int bd_holders; @@ -1315,7 +1315,7 @@ struct super_block { unsigned long s_blocksize; unsigned char s_blocksize_bits; unsigned char s_dirt; - unsigned long long s_maxbytes; /* Max file size */ + loff_t s_maxbytes; /* Max file size */ struct file_system_type *s_type; const struct super_operations *s_op; const struct dquot_operations *dq_op; @@ -2156,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); extern void generic_delete_inode(struct inode *inode); extern void generic_drop_inode(struct inode *inode); +extern int generic_detach_inode(struct inode *inode); extern struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), @@ -2334,6 +2335,7 @@ extern void get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); +extern struct super_block *get_active_super(struct block_device *bdev); extern struct super_block *user_get_super(dev_t); extern void drop_super(struct super_block *sb); @@ -2381,7 +2383,8 @@ extern int buffer_migrate_page(struct address_space *, #define buffer_migrate_page NULL #endif -extern int inode_change_ok(struct inode *, struct iattr *); +extern int inode_change_ok(const struct inode *, struct iattr *); +extern int inode_newsize_ok(const struct inode *, loff_t offset); extern int __must_check inode_setattr(struct inode *, struct iattr *); extern void file_update_time(struct file *file); @@ -2467,7 +2470,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); struct ctl_table; -int proc_nr_files(struct ctl_table *table, int write, struct file *filp, +int proc_nr_files(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); int __init get_filesystem_list(char *buf); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3c0924a18da..cd3d2abaf30 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -19,7 +19,7 @@ extern int ftrace_enabled; extern int ftrace_enable_sysctl(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); @@ -94,7 +94,7 @@ static inline void ftrace_start(void) { } extern int stack_tracer_enabled; int stack_trace_sysctl(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); #endif diff --git a/include/linux/futex.h b/include/linux/futex.h index 34956c8fdeb..8ec17997d94 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -4,11 +4,6 @@ #include <linux/compiler.h> #include <linux/types.h> -struct inode; -struct mm_struct; -struct task_struct; -union ktime; - /* Second argument to futex syscall */ @@ -129,6 +124,11 @@ struct robust_list_head { #define FUTEX_BITSET_MATCH_ANY 0xffffffff #ifdef __KERNEL__ +struct inode; +struct mm_struct; +struct task_struct; +union ktime; + long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout, u32 __user *uaddr2, u32 val2, u32 val3); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 176e7ee73ef..11ab19ac6b3 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -20,9 +20,9 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) } void reset_vma_resv_huge_pages(struct vm_area_struct *vma); -int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h new file mode 100644 index 00000000000..fc5db826b48 --- /dev/null +++ b/include/linux/i2c/adp5588.h @@ -0,0 +1,92 @@ +/* + * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller + * + * Copyright 2009 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#ifndef _ADP5588_H +#define _ADP5588_H + +#define DEV_ID 0x00 /* Device ID */ +#define CFG 0x01 /* Configuration Register1 */ +#define INT_STAT 0x02 /* Interrupt Status Register */ +#define KEY_LCK_EC_STAT 0x03 /* Key Lock and Event Counter Register */ +#define Key_EVENTA 0x04 /* Key Event Register A */ +#define Key_EVENTB 0x05 /* Key Event Register B */ +#define Key_EVENTC 0x06 /* Key Event Register C */ +#define Key_EVENTD 0x07 /* Key Event Register D */ +#define Key_EVENTE 0x08 /* Key Event Register E */ +#define Key_EVENTF 0x09 /* Key Event Register F */ +#define Key_EVENTG 0x0A /* Key Event Register G */ +#define Key_EVENTH 0x0B /* Key Event Register H */ +#define Key_EVENTI 0x0C /* Key Event Register I */ +#define Key_EVENTJ 0x0D /* Key Event Register J */ +#define KP_LCK_TMR 0x0E /* Keypad Lock1 to Lock2 Timer */ +#define UNLOCK1 0x0F /* Unlock Key1 */ +#define UNLOCK2 0x10 /* Unlock Key2 */ +#define GPIO_INT_STAT1 0x11 /* GPIO Interrupt Status */ +#define GPIO_INT_STAT2 0x12 /* GPIO Interrupt Status */ +#define GPIO_INT_STAT3 0x13 /* GPIO Interrupt Status */ +#define GPIO_DAT_STAT1 0x14 /* GPIO Data Status, Read twice to clear */ +#define GPIO_DAT_STAT2 0x15 /* GPIO Data Status, Read twice to clear */ +#define GPIO_DAT_STAT3 0x16 /* GPIO Data Status, Read twice to clear */ +#define GPIO_DAT_OUT1 0x17 /* GPIO DATA OUT */ +#define GPIO_DAT_OUT2 0x18 /* GPIO DATA OUT */ +#define GPIO_DAT_OUT3 0x19 /* GPIO DATA OUT */ +#define GPIO_INT_EN1 0x1A /* GPIO Interrupt Enable */ +#define GPIO_INT_EN2 0x1B /* GPIO Interrupt Enable */ +#define GPIO_INT_EN3 0x1C /* GPIO Interrupt Enable */ +#define KP_GPIO1 0x1D /* Keypad or GPIO Selection */ +#define KP_GPIO2 0x1E /* Keypad or GPIO Selection */ +#define KP_GPIO3 0x1F /* Keypad or GPIO Selection */ +#define GPI_EM1 0x20 /* GPI Event Mode 1 */ +#define GPI_EM2 0x21 /* GPI Event Mode 2 */ +#define GPI_EM3 0x22 /* GPI Event Mode 3 */ +#define GPIO_DIR1 0x23 /* GPIO Data Direction */ +#define GPIO_DIR2 0x24 /* GPIO Data Direction */ +#define GPIO_DIR3 0x25 /* GPIO Data Direction */ +#define GPIO_INT_LVL1 0x26 /* GPIO Edge/Level Detect */ +#define GPIO_INT_LVL2 0x27 /* GPIO Edge/Level Detect */ +#define GPIO_INT_LVL3 0x28 /* GPIO Edge/Level Detect */ +#define Debounce_DIS1 0x29 /* Debounce Disable */ +#define Debounce_DIS2 0x2A /* Debounce Disable */ +#define Debounce_DIS3 0x2B /* Debounce Disable */ +#define GPIO_PULL1 0x2C /* GPIO Pull Disable */ +#define GPIO_PULL2 0x2D /* GPIO Pull Disable */ +#define GPIO_PULL3 0x2E /* GPIO Pull Disable */ +#define CMP_CFG_STAT 0x30 /* Comparator Configuration and Status Register */ +#define CMP_CONFG_SENS1 0x31 /* Sensor1 Comparator Configuration Register */ +#define CMP_CONFG_SENS2 0x32 /* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */ +#define CMP1_LVL2_TRIP 0x33 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */ +#define CMP1_LVL2_HYS 0x34 /* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */ +#define CMP1_LVL3_TRIP 0x35 /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */ +#define CMP1_LVL3_HYS 0x36 /* Sensor 2 Comparator Configuration Register */ +#define CMP2_LVL2_TRIP 0x37 /* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */ +#define CMP2_LVL2_HYS 0x38 /* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */ +#define CMP2_LVL3_TRIP 0x39 /* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */ +#define CMP2_LVL3_HYS 0x3A /* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */ +#define CMP1_ADC_DAT_R1 0x3B /* Comparator 1 ADC data Register1 */ +#define CMP1_ADC_DAT_R2 0x3C /* Comparator 1 ADC data Register2 */ +#define CMP2_ADC_DAT_R1 0x3D /* Comparator 2 ADC data Register1 */ +#define CMP2_ADC_DAT_R2 0x3E /* Comparator 2 ADC data Register2 */ + +#define ADP5588_DEVICE_ID_MASK 0xF + +/* Put one of these structures in i2c_board_info platform_data */ + +#define ADP5588_KEYMAPSIZE 80 + +struct adp5588_kpad_platform_data { + int rows; /* Number of rows */ + int cols; /* Number of columns */ + const unsigned short *keymap; /* Pointer to keymap */ + unsigned short keymapsize; /* Keymap size */ + unsigned repeat:1; /* Enable key repeat */ + unsigned en_keylock:1; /* Enable Key Lock feature */ + unsigned short unlock_key1; /* Unlock Key 1 */ + unsigned short unlock_key2; /* Unlock Key 2 */ +}; + +#endif diff --git a/include/linux/i2c/mcs5000_ts.h b/include/linux/i2c/mcs5000_ts.h new file mode 100644 index 00000000000..5a117b5ca15 --- /dev/null +++ b/include/linux/i2c/mcs5000_ts.h @@ -0,0 +1,24 @@ +/* + * mcs5000_ts.h + * + * Copyright (C) 2009 Samsung Electronics Co.Ltd + * Author: Joonyoung Shim <jy0922.shim@samsung.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __LINUX_MCS5000_TS_H +#define __LINUX_MCS5000_TS_H + +/* platform data for the MELFAS MCS-5000 touchscreen driver */ +struct mcs5000_ts_platform_data { + void (*cfg_pin)(void); + int x_size; + int y_size; +}; + +#endif /* __LINUX_MCS5000_TS_H */ diff --git a/include/linux/i8042.h b/include/linux/i8042.h index 7907a72403e..60c3360ef6a 100644 --- a/include/linux/i8042.h +++ b/include/linux/i8042.h @@ -7,6 +7,7 @@ * the Free Software Foundation. */ +#include <linux/types.h> /* * Standard commands. @@ -30,6 +31,35 @@ #define I8042_CMD_MUX_PFX 0x0090 #define I8042_CMD_MUX_SEND 0x1090 +struct serio; + +#if defined(CONFIG_SERIO_I8042) || defined(CONFIG_SERIO_I8042_MODULE) + +void i8042_lock_chip(void); +void i8042_unlock_chip(void); int i8042_command(unsigned char *param, int command); +bool i8042_check_port_owner(const struct serio *); + +#else + +void i8042_lock_chip(void) +{ +} + +void i8042_unlock_chip(void) +{ +} + +int i8042_command(unsigned char *param, int command) +{ + return -ENOSYS; +} + +bool i8042_check_port_owner(const struct serio *serio) +{ + return false; +} + +#endif #endif diff --git a/include/linux/input.h b/include/linux/input.h index 8b3bc3e0d14..0ccfc30cd40 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1123,7 +1123,7 @@ struct input_dev { struct mutex mutex; unsigned int users; - int going_away; + bool going_away; struct device dev; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 8e9e151f811..b78cf819495 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -84,7 +84,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); * struct irqaction - per interrupt action descriptor * @handler: interrupt handler function * @flags: flags (see IRQF_* above) - * @mask: no comment as it is useless and about to be removed * @name: name of the device * @dev_id: cookie to identify the device * @next: pointer to the next irqaction for shared interrupts @@ -97,7 +96,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); struct irqaction { irq_handler_t handler; unsigned long flags; - cpumask_t mask; const char *name; void *dev_id; struct irqaction *next; diff --git a/include/linux/libps2.h b/include/linux/libps2.h index fcf5fbe6a50..79603a6c356 100644 --- a/include/linux/libps2.h +++ b/include/linux/libps2.h @@ -44,6 +44,8 @@ struct ps2dev { void ps2_init(struct ps2dev *ps2dev, struct serio *serio); int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout); void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout); +void ps2_begin_command(struct ps2dev *ps2dev); +void ps2_end_command(struct ps2dev *ps2dev); int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command); int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command); int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data); diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 691f59171c6..5126cceb6ae 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -57,6 +57,7 @@ #ifdef __ASSEMBLY__ +#ifndef LINKER_SCRIPT #define ALIGN __ALIGN #define ALIGN_STR __ALIGN_STR @@ -66,6 +67,7 @@ ALIGN; \ name: #endif +#endif /* LINKER_SCRIPT */ #ifndef WEAK #define WEAK(name) \ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e46a0734ab6..bf9213b2db8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -118,6 +118,9 @@ static inline bool mem_cgroup_disabled(void) extern bool mem_cgroup_oom_called(struct task_struct *task); void mem_cgroup_update_mapped_file_stat(struct page *page, int val); +unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, + gfp_t gfp_mask, int nid, + int zid); #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct mem_cgroup; @@ -276,6 +279,13 @@ static inline void mem_cgroup_update_mapped_file_stat(struct page *page, { } +static inline +unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, + gfp_t gfp_mask, int nid, int zid) +{ + return 0; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index b6eae5e3144..df08551cb0a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -695,11 +695,12 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_SIGBUS 0x0002 #define VM_FAULT_MAJOR 0x0004 #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ +#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */ #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ -#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) +#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON) /* * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. @@ -791,8 +792,14 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, unmap_mapping_range(mapping, holebegin, holelen, 0); } -extern int vmtruncate(struct inode * inode, loff_t offset); -extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); +extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); +extern int vmtruncate(struct inode *inode, loff_t offset); +extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end); + +int truncate_inode_page(struct address_space *mapping, struct page *page); +int generic_error_remove_page(struct address_space *mapping, struct page *page); + +int invalidate_inode_page(struct page *page); #ifdef CONFIG_MMU extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, @@ -1279,7 +1286,7 @@ int in_gate_area_no_task(unsigned long addr); #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ -int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, +int drop_caches_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages); @@ -1308,5 +1315,12 @@ void vmemmap_populate_print_last(void); extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, size_t size); extern void refund_locked_memory(struct mm_struct *mm, size_t size); + +extern void memory_failure(unsigned long pfn, int trapno); +extern int __memory_failure(unsigned long pfn, int trapno, int ref); +extern int sysctl_memory_failure_early_kill; +extern int sysctl_memory_failure_recovery; +extern atomic_long_t mce_bad_pages; + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0042090a4d7..21d6aa45206 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -240,6 +240,8 @@ struct mm_struct { unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ + struct linux_binfmt *binfmt; + cpumask_t cpu_vm_mask; /* Architecture-specific MM context */ @@ -259,11 +261,10 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access the bits */ struct core_state *core_state; /* coredumping support */ - - /* aio bits */ +#ifdef CONFIG_AIO spinlock_t ioctx_lock; struct hlist_head ioctx_list; - +#endif #ifdef CONFIG_MM_OWNER /* * "owner" points to a task that is regarded as the canonical diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 652ef01be58..6f7561730d8 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -755,21 +755,20 @@ static inline int is_dma(struct zone *zone) /* These two functions are used to setup the per zone pages min values */ struct ctl_table; -struct file; -int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, +int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; -int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, +int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, +int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern int numa_zonelist_order_handler(struct ctl_table *, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ diff --git a/include/linux/module.h b/include/linux/module.h index 1c755b2f937..482efc865ac 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -128,7 +128,10 @@ extern struct module __this_module; */ #define MODULE_LICENSE(_license) MODULE_INFO(license, _license) -/* Author, ideally of form NAME[, NAME]*[ and NAME] */ +/* + * Author(s), use "Name <email>" or just "Name", for multiple + * authors use multiple MODULE_AUTHOR() statements/lines. + */ #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author) /* What your module does. */ @@ -308,10 +311,14 @@ struct module #endif #ifdef CONFIG_KALLSYMS - /* We keep the symbol and string tables for kallsyms. */ - Elf_Sym *symtab; - unsigned int num_symtab; - char *strtab; + /* + * We keep the symbol and string tables for kallsyms. + * The core_* fields below are temporary, loader-only (they + * could really be discarded after module init). + */ + Elf_Sym *symtab, *core_symtab; + unsigned int num_symtab, core_num_syms; + char *strtab, *core_strtab; /* Section attributes */ struct module_sect_attrs *sect_attrs; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 080f6ba9e73..ab5d3126831 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -187,6 +187,7 @@ extern struct sock *netlink_kernel_create(struct net *net, extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); +extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 13de789f0a5..6b202b17395 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -51,6 +51,9 @@ * PG_buddy is set to indicate that the page is free and in the buddy system * (see mm/page_alloc.c). * + * PG_hwpoison indicates that a page got corrupted in hardware and contains + * data with incorrect ECC bits that triggered a machine check. Accessing is + * not safe since it may cause another machine check. Don't touch! */ /* @@ -102,6 +105,9 @@ enum pageflags { #ifdef CONFIG_ARCH_USES_PG_UNCACHED PG_uncached, /* Page has been mapped as uncached */ #endif +#ifdef CONFIG_MEMORY_FAILURE + PG_hwpoison, /* hardware poisoned page. Don't touch */ +#endif __NR_PAGEFLAGS, /* Filesystems */ @@ -269,6 +275,15 @@ PAGEFLAG(Uncached, uncached) PAGEFLAG_FALSE(Uncached) #endif +#ifdef CONFIG_MEMORY_FAILURE +PAGEFLAG(HWPoison, hwpoison) +TESTSETFLAG(HWPoison, hwpoison) +#define __PG_HWPOISON (1UL << PG_hwpoison) +#else +PAGEFLAG_FALSE(HWPoison) +#define __PG_HWPOISON 0 +#endif + static inline int PageUptodate(struct page *page) { int ret = test_bit(PG_uptodate, &(page)->flags); @@ -393,7 +408,7 @@ static inline void __ClearPageTail(struct page *page) 1 << PG_private | 1 << PG_private_2 | \ 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - 1 << PG_unevictable | __PG_MLOCKED) + 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON) /* * Flags checked when a page is prepped for return by the page allocator. diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index ada779f2417..4b938d4f3ac 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -38,6 +38,7 @@ enum { PCG_LOCK, /* page cgroup is locked */ PCG_CACHE, /* charged as cache */ PCG_USED, /* this object is in use. */ + PCG_ACCT_LRU, /* page has been accounted for */ }; #define TESTPCGFLAG(uname, lname) \ @@ -52,11 +53,23 @@ static inline void SetPageCgroup##uname(struct page_cgroup *pc)\ static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ { clear_bit(PCG_##lname, &pc->flags); } +#define TESTCLEARPCGFLAG(uname, lname) \ +static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \ + { return test_and_clear_bit(PCG_##lname, &pc->flags); } + /* Cache flag is set only once (at allocation) */ TESTPCGFLAG(Cache, CACHE) +CLEARPCGFLAG(Cache, CACHE) +SETPCGFLAG(Cache, CACHE) TESTPCGFLAG(Used, USED) CLEARPCGFLAG(Used, USED) +SETPCGFLAG(Used, USED) + +SETPCGFLAG(AcctLRU, ACCT_LRU) +CLEARPCGFLAG(AcctLRU, ACCT_LRU) +TESTPCGFLAG(AcctLRU, ACCT_LRU) +TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU) static inline int page_cgroup_nid(struct page_cgroup *pc) { diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 7803565aa87..da1fda8623e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2527,6 +2527,16 @@ #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b #define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c +#define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718 +#define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719 #define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 #define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 #define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 1ef5a078183..e5126cff9b2 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -38,6 +38,7 @@ #define PNPIPE_IFINDEX 2 #define PNADDR_ANY 0 +#define PNADDR_BROADCAST 0xFC #define PNPORT_RESOURCE_ROUTING 0 /* Values for PNPIPE_ENCAP option */ diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 07bff666e65..931150566ad 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -88,4 +88,6 @@ #define PR_TASK_PERF_EVENTS_DISABLE 31 #define PR_TASK_PERF_EVENTS_ENABLE 32 +#define PR_MCE_KILL 33 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/relay.h b/include/linux/relay.h index 953fc055e87..14a86bc7102 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -140,7 +140,7 @@ struct rchan_callbacks * cause relay_open() to create a single global buffer rather * than the default set of per-cpu buffers. * - * See Documentation/filesystems/relayfs.txt for more info. + * See Documentation/filesystems/relay.txt for more info. */ struct dentry *(*create_buf_file)(const char *filename, struct dentry *parent, diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 511f42fc681..731af71cddc 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -35,6 +35,10 @@ struct res_counter { */ unsigned long long limit; /* + * the limit that usage can be exceed + */ + unsigned long long soft_limit; + /* * the number of unsuccessful attempts to consume the resource */ unsigned long long failcnt; @@ -87,6 +91,7 @@ enum { RES_MAX_USAGE, RES_LIMIT, RES_FAILCNT, + RES_SOFT_LIMIT, }; /* @@ -109,7 +114,8 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent); int __must_check res_counter_charge_locked(struct res_counter *counter, unsigned long val); int __must_check res_counter_charge(struct res_counter *counter, - unsigned long val, struct res_counter **limit_fail_at); + unsigned long val, struct res_counter **limit_fail_at, + struct res_counter **soft_limit_at); /* * uncharge - tell that some portion of the resource is released @@ -122,7 +128,8 @@ int __must_check res_counter_charge(struct res_counter *counter, */ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); -void res_counter_uncharge(struct res_counter *counter, unsigned long val); +void res_counter_uncharge(struct res_counter *counter, unsigned long val, + bool *was_soft_limit_excess); static inline bool res_counter_limit_check_locked(struct res_counter *cnt) { @@ -132,6 +139,36 @@ static inline bool res_counter_limit_check_locked(struct res_counter *cnt) return false; } +static inline bool res_counter_soft_limit_check_locked(struct res_counter *cnt) +{ + if (cnt->usage < cnt->soft_limit) + return true; + + return false; +} + +/** + * Get the difference between the usage and the soft limit + * @cnt: The counter + * + * Returns 0 if usage is less than or equal to soft limit + * The difference between usage and soft limit, otherwise. + */ +static inline unsigned long long +res_counter_soft_limit_excess(struct res_counter *cnt) +{ + unsigned long long excess; + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + if (cnt->usage <= cnt->soft_limit) + excess = 0; + else + excess = cnt->usage - cnt->soft_limit; + spin_unlock_irqrestore(&cnt->lock, flags); + return excess; +} + /* * Helper function to detect if the cgroup is within it's limit or * not. It's currently called from cgroup_rss_prepare() @@ -147,6 +184,17 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt) return ret; } +static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + ret = res_counter_soft_limit_check_locked(cnt); + spin_unlock_irqrestore(&cnt->lock, flags); + return ret; +} + static inline void res_counter_reset_max(struct res_counter *cnt) { unsigned long flags; @@ -180,4 +228,16 @@ static inline int res_counter_set_limit(struct res_counter *cnt, return ret; } +static inline int +res_counter_set_soft_limit(struct res_counter *cnt, + unsigned long long soft_limit) +{ + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + cnt->soft_limit = soft_limit; + spin_unlock_irqrestore(&cnt->lock, flags); + return 0; +} + #endif diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 477841d29fc..cb0ba703260 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -81,7 +81,19 @@ static inline void page_dup_rmap(struct page *page) */ int page_referenced(struct page *, int is_locked, struct mem_cgroup *cnt, unsigned long *vm_flags); -int try_to_unmap(struct page *, int ignore_refs); +enum ttu_flags { + TTU_UNMAP = 0, /* unmap mode */ + TTU_MIGRATION = 1, /* migration mode */ + TTU_MUNLOCK = 2, /* munlock mode */ + TTU_ACTION_MASK = 0xff, + + TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ + TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ + TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */ +}; +#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) + +int try_to_unmap(struct page *, enum ttu_flags flags); /* * Called from mm/filemap_xip.c to unmap empty zero page @@ -108,6 +120,13 @@ int page_mkclean(struct page *); */ int try_to_munlock(struct page *); +/* + * Called by memory-failure.c to kill processes. + */ +struct anon_vma *page_lock_anon_vma(struct page *page); +void page_unlock_anon_vma(struct anon_vma *anon_vma); +int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); + #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index cbf2a3b4628..75e6e60bf58 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -309,7 +309,7 @@ extern void softlockup_tick(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern int softlockup_thresh; @@ -331,7 +331,7 @@ extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos); #endif @@ -1271,7 +1271,6 @@ struct task_struct { struct mm_struct *mm, *active_mm; /* task state */ - struct linux_binfmt *binfmt; int exit_state; int exit_code, exit_signal; int pdeath_signal; /* The signal sent when the parent dies */ @@ -1735,6 +1734,7 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ +#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_SIGNALED 0x00000400 /* killed by a signal */ @@ -1754,6 +1754,7 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ +#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ @@ -1817,10 +1818,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, return 0; } #endif + +#ifndef CONFIG_CPUMASK_OFFSTACK static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) { return set_cpus_allowed_ptr(p, &new_mask); } +#endif /* * Architectures can set this to 1 if they have specified @@ -1903,7 +1907,7 @@ extern unsigned int sysctl_sched_time_avg; extern unsigned int sysctl_timer_migration; int sched_nr_latency_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, + void __user *buffer, size_t *length, loff_t *ppos); #endif #ifdef CONFIG_SCHED_DEBUG @@ -1921,7 +1925,7 @@ extern unsigned int sysctl_sched_rt_period; extern int sysctl_sched_rt_runtime; int sched_rt_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int sysctl_sched_compat_yield; @@ -2056,6 +2060,7 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern int kill_proc_info(int, struct siginfo *, pid_t); extern int do_notify_parent(struct task_struct *, int); +extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int, struct task_struct *); extern void force_sig_specific(int, struct task_struct *); extern int send_sig(int, struct task_struct *, int); @@ -2333,7 +2338,10 @@ static inline int signal_pending(struct task_struct *p) return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } -extern int __fatal_signal_pending(struct task_struct *p); +static inline int __fatal_signal_pending(struct task_struct *p) +{ + return unlikely(sigismember(&p->pending.signal, SIGKILL)); +} static inline int fatal_signal_pending(struct task_struct *p) { diff --git a/include/linux/security.h b/include/linux/security.h index d050b66ab9e..239e40d0450 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -133,7 +133,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint) return PAGE_ALIGN(mmap_min_addr); return hint; } -extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, +extern int mmap_min_addr_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #ifdef CONFIG_SECURITY diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 0c6a86b7959..8366d8f12e5 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -35,6 +35,44 @@ struct seq_operations { #define SEQ_SKIP 1 +/** + * seq_get_buf - get buffer to write arbitrary data to + * @m: the seq_file handle + * @bufp: the beginning of the buffer is stored here + * + * Return the number of bytes available in the buffer, or zero if + * there's no space. + */ +static inline size_t seq_get_buf(struct seq_file *m, char **bufp) +{ + BUG_ON(m->count > m->size); + if (m->count < m->size) + *bufp = m->buf + m->count; + else + *bufp = NULL; + + return m->size - m->count; +} + +/** + * seq_commit - commit data to the buffer + * @m: the seq_file handle + * @num: the number of bytes to commit + * + * Commit @num bytes of data written to a buffer previously acquired + * by seq_buf_get. To signal an error condition, or that the data + * didn't fit in the available space, pass a negative @num value. + */ +static inline void seq_commit(struct seq_file *m, int num) +{ + if (num < 0) { + m->count = m->size; + } else { + BUG_ON(m->count + num > m->size); + m->count += num; + } +} + char *mangle_path(char *s, char *p, char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); diff --git a/include/linux/signal.h b/include/linux/signal.h index c7552836bd9..ab9272cc270 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -233,6 +233,8 @@ static inline int valid_signal(unsigned long sig) } extern int next_signal(struct sigpending *pending, sigset_t *mask); +extern int do_send_sig_info(int sig, struct siginfo *info, + struct task_struct *p, bool group); extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, diff --git a/include/linux/smp.h b/include/linux/smp.h index 9e3d8af0920..39c64bae776 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -73,15 +73,6 @@ int smp_call_function(void(*func)(void *info), void *info, int wait); void smp_call_function_many(const struct cpumask *mask, void (*func)(void *info), void *info, bool wait); -/* Deprecated: Use smp_call_function_many which takes a pointer to the mask. */ -static inline int -smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, - int wait) -{ - smp_call_function_many(&mask, func, info, wait); - return 0; -} - void __smp_call_function_single(int cpuid, struct call_single_data *data, int wait); @@ -144,8 +135,6 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) static inline void smp_send_reschedule(int cpu) { } #define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) -#define smp_call_function_mask(mask, func, info, wait) \ - (up_smp_call_function(func, info)) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) static inline void init_call_single_data(void) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 7da466ba4b0..f5cc0898bc5 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -11,6 +11,7 @@ #include <linux/uio.h> #include <asm/byteorder.h> +#include <asm/unaligned.h> #include <linux/scatterlist.h> /* @@ -117,14 +118,14 @@ static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int le static inline __be32 * xdr_encode_hyper(__be32 *p, __u64 val) { - *(__be64 *)p = cpu_to_be64(val); + put_unaligned_be64(val, p); return p + 2; } static inline __be32 * xdr_decode_hyper(__be32 *p, __u64 *valp) { - *valp = be64_to_cpup((__be64 *)p); + *valp = get_unaligned_be64(p); return p + 2; } diff --git a/include/linux/swap.h b/include/linux/swap.h index 6c990e658f4..4ec90019c1a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -34,16 +34,38 @@ static inline int current_is_kswapd(void) * the type/offset into the pte as 5/27 as well. */ #define MAX_SWAPFILES_SHIFT 5 -#ifndef CONFIG_MIGRATION -#define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT) + +/* + * Use some of the swap files numbers for other purposes. This + * is a convenient way to hook into the VM to trigger special + * actions on faults. + */ + +/* + * NUMA node memory migration support + */ +#ifdef CONFIG_MIGRATION +#define SWP_MIGRATION_NUM 2 +#define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM) +#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1) #else -/* Use last two entries for page migration swap entries */ -#define MAX_SWAPFILES ((1 << MAX_SWAPFILES_SHIFT)-2) -#define SWP_MIGRATION_READ MAX_SWAPFILES -#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + 1) +#define SWP_MIGRATION_NUM 0 #endif /* + * Handling of hardware poisoned pages with memory corruption. + */ +#ifdef CONFIG_MEMORY_FAILURE +#define SWP_HWPOISON_NUM 1 +#define SWP_HWPOISON MAX_SWAPFILES +#else +#define SWP_HWPOISON_NUM 0 +#endif + +#define MAX_SWAPFILES \ + ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) + +/* * Magic header for a swap area. The first part of the union is * what the swap magic looks like for the old (limited to 128MB) * swap area format, the second part of the union adds - in the @@ -217,6 +239,11 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, unsigned int swappiness); +extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, + gfp_t gfp_mask, bool noswap, + unsigned int swappiness, + struct zone *zone, + int nid); extern int __isolate_lru_page(struct page *page, int mode, int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; @@ -240,7 +267,7 @@ extern int page_evictable(struct page *page, struct vm_area_struct *vma); extern void scan_mapping_unevictable_pages(struct address_space *); extern unsigned long scan_unevictable_pages; -extern int scan_unevictable_handler(struct ctl_table *, int, struct file *, +extern int scan_unevictable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int scan_unevictable_register_node(struct node *node); extern void scan_unevictable_unregister_node(struct node *node); diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 6ec39ab27b4..cd42e30b7c6 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry) #endif +#ifdef CONFIG_MEMORY_FAILURE +/* + * Support for hardware poisoned pages + */ +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + BUG_ON(!PageLocked(page)); + return swp_entry(SWP_HWPOISON, page_to_pfn(page)); +} + +static inline int is_hwpoison_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_HWPOISON; +} +#else + +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + return swp_entry(0, 0); +} + +static inline int is_hwpoison_entry(swp_entry_t swp) +{ + return 0; +} +#endif + +#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) +static inline int non_swap_entry(swp_entry_t entry) +{ + return swp_type(entry) >= MAX_SWAPFILES; +} +#else +static inline int non_swap_entry(swp_entry_t entry) +{ + return 0; +} +#endif diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e76d3b22a46..1e4743ee683 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -29,7 +29,6 @@ #include <linux/types.h> #include <linux/compiler.h> -struct file; struct completion; #define CTL_MAXNAME 10 /* how many path components do we allow in a @@ -977,25 +976,25 @@ typedef int ctl_handler (struct ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); -typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp, +typedef int proc_handler (struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -extern int proc_dostring(struct ctl_table *, int, struct file *, +extern int proc_dostring(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec(struct ctl_table *, int, struct file *, +extern int proc_dointvec(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *, +extern int proc_dointvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *, +extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *, +extern int proc_doulongvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, - struct file *, void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); extern int do_sysctl (int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, diff --git a/include/linux/time.h b/include/linux/time.h index 56787c09334..fe04e5ef6a5 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -155,6 +155,34 @@ extern void timekeeping_leap_insert(int leapsecond); struct tms; extern void do_sys_times(struct tms *); +/* + * Similar to the struct tm in userspace <time.h>, but it needs to be here so + * that the kernel source is self contained. + */ +struct tm { + /* + * the number of seconds after the minute, normally in the range + * 0 to 59, but can be up to 60 to allow for leap seconds + */ + int tm_sec; + /* the number of minutes after the hour, in the range 0 to 59*/ + int tm_min; + /* the number of hours past midnight, in the range 0 to 23 */ + int tm_hour; + /* the day of the month, in the range 1 to 31 */ + int tm_mday; + /* the number of months since January, in the range 0 to 11 */ + int tm_mon; + /* the number of years since 1900 */ + long tm_year; + /* the number of days since Sunday, in the range 0 to 6 */ + int tm_wday; + /* the number of days since January 1, in the range 0 to 365 */ + int tm_yday; +}; + +void time_to_tm(time_t totalsecs, int offset, struct tm *result); + /** * timespec_to_ns - Convert timespec to nanoseconds * @ts: pointer to the timespec variable to be converted diff --git a/include/linux/topology.h b/include/linux/topology.h index 809b26c0709..fc0bf3edeb6 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -211,12 +211,6 @@ int arch_update_cpu_topology(void); #ifndef topology_core_id #define topology_core_id(cpu) ((void)(cpu), 0) #endif -#ifndef topology_thread_siblings -#define topology_thread_siblings(cpu) cpumask_of_cpu(cpu) -#endif -#ifndef topology_core_siblings -#define topology_core_siblings(cpu) cpumask_of_cpu(cpu) -#endif #ifndef topology_thread_cpumask #define topology_thread_cpumask(cpu) cpumask_of(cpu) #endif diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 17ba82efa48..1eb44a924e5 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -1,7 +1,7 @@ /* * Tracing hooks * - * Copyright (C) 2008 Red Hat, Inc. All rights reserved. + * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -463,22 +463,38 @@ static inline int tracehook_get_signal(struct task_struct *task, /** * tracehook_notify_jctl - report about job control stop/continue - * @notify: nonzero if this is the last thread in the group to stop + * @notify: zero, %CLD_STOPPED or %CLD_CONTINUED * @why: %CLD_STOPPED or %CLD_CONTINUED * * This is called when we might call do_notify_parent_cldstop(). - * It's called when about to stop for job control; we are already in - * %TASK_STOPPED state, about to call schedule(). It's also called when - * a delayed %CLD_STOPPED or %CLD_CONTINUED report is ready to be made. * - * Return nonzero to generate a %SIGCHLD with @why, which is - * normal if @notify is nonzero. + * @notify is zero if we would not ordinarily send a %SIGCHLD, + * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD. * - * Called with no locks held. + * @why is %CLD_STOPPED when about to stop for job control; + * we are already in %TASK_STOPPED state, about to call schedule(). + * It might also be that we have just exited (check %PF_EXITING), + * but need to report that a group-wide stop is complete. + * + * @why is %CLD_CONTINUED when waking up after job control stop and + * ready to make a delayed @notify report. + * + * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal. + * + * Called with the siglock held. */ static inline int tracehook_notify_jctl(int notify, int why) { - return notify || (current->ptrace & PT_PTRACED); + return notify ?: (current->ptrace & PT_PTRACED) ? why : 0; +} + +/** + * tracehook_finish_jctl - report about return from job control stop + * + * This is called by do_signal_stop() after wakeup. + */ +static inline void tracehook_finish_jctl(void) +{ } #define DEATH_REAP -1 diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 63a3f7a8058..660a9de96f8 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -4,7 +4,7 @@ /* * Kernel Tracepoint API. * - * See Documentation/tracepoint.txt. + * See Documentation/trace/tracepoints.txt. * * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> * diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h index 46dd12c5709..9356b24223a 100644 --- a/include/linux/unaligned/be_byteshift.h +++ b/include/linux/unaligned/be_byteshift.h @@ -1,7 +1,7 @@ #ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H #define _LINUX_UNALIGNED_BE_BYTESHIFT_H -#include <linux/kernel.h> +#include <linux/types.h> static inline u16 __get_unaligned_be16(const u8 *p) { diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h index 59777e951ba..be376fb79b6 100644 --- a/include/linux/unaligned/le_byteshift.h +++ b/include/linux/unaligned/le_byteshift.h @@ -1,7 +1,7 @@ #ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H #define _LINUX_UNALIGNED_LE_BYTESHIFT_H -#include <linux/kernel.h> +#include <linux/types.h> static inline u16 __get_unaligned_le16(const u8 *p) { diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index bb69e256cd1..f8147305205 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -89,6 +89,7 @@ struct driver_info { #define FLAG_FRAMING_AX 0x0040 /* AX88772/178 packets */ #define FLAG_WLAN 0x0080 /* use "wlan%d" names */ #define FLAG_AVOID_UNLINK_URBS 0x0100 /* don't unlink urbs at usbnet_stop() */ +#define FLAG_SEND_ZLP 0x0200 /* hw requires ZLPs are sent */ /* init device ... can sleep, or cause probe() failure */ diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 3656b300de3..69f39974c04 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -36,7 +36,6 @@ struct new_utsname { #include <linux/kref.h> #include <linux/nsproxy.h> #include <linux/err.h> -#include <asm/atomic.h> struct uts_namespace { struct kref kref; diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index 923f9040ea2..2dfaa293ae8 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -1,5 +1,6 @@ /* - * vgaarb.c + * The VGA aribiter manages VGA space routing and VGA resource decode to + * allow multiple VGA devices to be used in a system in a safe way. * * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com> diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 75cf58666ff..66ebddcff66 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -110,21 +110,20 @@ extern int laptop_mode; extern unsigned long determine_dirtyable_memory(void); extern int dirty_background_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_background_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); extern int dirty_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos); struct ctl_table; -struct file; -int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, +int dirty_writeback_centisecs_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index b77c1478c99..a7fb54808a2 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -38,6 +38,8 @@ * @P9_DEBUG_SLABS: memory management tracing * @P9_DEBUG_FCALL: verbose dump of protocol messages * @P9_DEBUG_FID: fid allocation/deallocation tracking + * @P9_DEBUG_PKT: packet marshalling/unmarshalling + * @P9_DEBUG_FSC: FS-cache tracing * * These flags are passed at mount time to turn on various levels of * verbosity and tracing which will be output to the system logs. @@ -54,6 +56,7 @@ enum p9_debug_flags { P9_DEBUG_FCALL = (1<<8), P9_DEBUG_FID = (1<<9), P9_DEBUG_PKT = (1<<10), + P9_DEBUG_FSC = (1<<11), }; #ifdef CONFIG_NET_9P_DEBUG diff --git a/include/net/ip.h b/include/net/ip.h index 72c36926c26..5b26a0bd178 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -399,7 +399,7 @@ extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, * fed into the routing cache should use these handlers. */ int ipv4_doint_and_flush(ctl_table *ctl, int write, - struct file* filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos); int ipv4_doint_and_flush_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, diff --git a/include/net/ipip.h b/include/net/ipip.h index 5d3036fa151..76e3ea6e2fe 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -12,7 +12,6 @@ struct ip_tunnel struct ip_tunnel *next; struct net_device *dev; - int recursion; /* Depth of hard_start_xmit recursion */ int err_count; /* Number of arrived ICMP errors */ unsigned long err_time; /* Time when the last ICMP error arrived */ diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 1459ed3e269..f76f22d0572 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -55,7 +55,6 @@ enum { #include <net/neighbour.h> struct ctl_table; -struct file; struct inet6_dev; struct net_device; struct net_proto_family; @@ -139,7 +138,6 @@ extern int igmp6_event_report(struct sk_buff *skb); #ifdef CONFIG_SYSCTL extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, - struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/init/Kconfig b/init/Kconfig index 0aa6579504c..c7bac39d6c6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1006,14 +1006,6 @@ config SLUB_DEBUG SLUB sysfs support. /sys/slab will not exist and there will be no support for cache validation etc. -config STRIP_ASM_SYMS - bool "Strip assembler-generated symbols during link" - default n - help - Strip internal assembler-generated symbols during a link (symbols - that look like '.Lxxx') so they don't pollute the output of - get_wchan() and suchlike. - config COMPAT_BRK bool "Disable heap randomization" default y diff --git a/init/main.c b/init/main.c index 6107223124e..7449819a480 100644 --- a/init/main.c +++ b/init/main.c @@ -18,7 +18,6 @@ #include <linux/string.h> #include <linux/ctype.h> #include <linux/delay.h> -#include <linux/utsname.h> #include <linux/ioport.h> #include <linux/init.h> #include <linux/smp_lock.h> @@ -360,11 +359,6 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { } #else -#if NR_CPUS > BITS_PER_LONG -cpumask_t cpu_mask_all __read_mostly = CPU_MASK_ALL; -EXPORT_SYMBOL(cpu_mask_all); -#endif - /* Setup number of possible processor ids */ int nr_cpu_ids __read_mostly = NR_CPUS; EXPORT_SYMBOL(nr_cpu_ids); diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 40eab7314ae..7d3704750ef 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -27,18 +27,18 @@ static void *get_ipc(ctl_table *table) } #ifdef CONFIG_PROC_SYSCTL -static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, +static int proc_ipc_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); + return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); } static int proc_ipc_callback_dointvec(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; size_t lenp_bef = *lenp; @@ -47,7 +47,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); + rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) /* @@ -61,13 +61,13 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, } static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - return proc_doulongvec_minmax(&ipc_table, write, filp, buffer, + return proc_doulongvec_minmax(&ipc_table, write, buffer, lenp, ppos); } @@ -95,7 +95,7 @@ static void ipc_auto_callback(int val) } static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; size_t lenp_bef = *lenp; @@ -106,7 +106,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, ipc_table.data = get_ipc(table); oldval = *((int *)(ipc_table.data)); - rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos); + rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) { int newval = *((int *)(ipc_table.data)); diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index 24ae46dfe45..8a058711fc1 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -31,24 +31,24 @@ static void *get_mq(ctl_table *table) return which; } -static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp, +static int proc_mq_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table mq_table; memcpy(&mq_table, table, sizeof(mq_table)); mq_table.data = get_mq(table); - return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos); + return proc_dointvec(&mq_table, write, buffer, lenp, ppos); } static int proc_mq_dointvec_minmax(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table mq_table; memcpy(&mq_table, table, sizeof(mq_table)); mq_table.data = get_mq(table); - return proc_dointvec_minmax(&mq_table, write, filp, buffer, + return proc_dointvec_minmax(&mq_table, write, buffer, lenp, ppos); } #else diff --git a/kernel/Makefile b/kernel/Makefile index 187c89b4783..b8d4cd8ac0b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -58,7 +58,6 @@ obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CGROUPS) += cgroup.o -obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o diff --git a/kernel/audit.c b/kernel/audit.c index defc2e6f1e3..5feed232be9 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -855,18 +855,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; } case AUDIT_SIGNAL_INFO: - err = security_secid_to_secctx(audit_sig_sid, &ctx, &len); - if (err) - return err; + len = 0; + if (audit_sig_sid) { + err = security_secid_to_secctx(audit_sig_sid, &ctx, &len); + if (err) + return err; + } sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL); if (!sig_data) { - security_release_secctx(ctx, len); + if (audit_sig_sid) + security_release_secctx(ctx, len); return -ENOMEM; } sig_data->uid = audit_sig_uid; sig_data->pid = audit_sig_pid; - memcpy(sig_data->ctx, ctx, len); - security_release_secctx(ctx, len); + if (audit_sig_sid) { + memcpy(sig_data->ctx, ctx, len); + security_release_secctx(ctx, len); + } audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, 0, 0, sig_data, sizeof(*sig_data) + len); kfree(sig_data); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 0e96dbc60ea..cc7e87936cb 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -45,8 +45,8 @@ struct audit_watch { atomic_t count; /* reference count */ - char *path; /* insertion path */ dev_t dev; /* associated superblock device */ + char *path; /* insertion path */ unsigned long ino; /* associated inode number */ struct audit_parent *parent; /* associated parent */ struct list_head wlist; /* entry in parent->watches list */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 68d3c6a0ecd..267e484f019 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -168,12 +168,12 @@ struct audit_context { int in_syscall; /* 1 if task is in a syscall */ enum audit_state state, current_state; unsigned int serial; /* serial number for record */ - struct timespec ctime; /* time of syscall entry */ int major; /* syscall number */ + struct timespec ctime; /* time of syscall entry */ unsigned long argv[4]; /* syscall arguments */ - int return_valid; /* return code is valid */ long return_code;/* syscall return code */ u64 prio; + int return_valid; /* return code is valid */ int name_count; struct audit_names names[AUDIT_NAMES]; char * filterkey; /* key for rule that triggered record */ @@ -198,8 +198,8 @@ struct audit_context { char target_comm[TASK_COMM_LEN]; struct audit_tree_refs *trees, *first_trees; - int tree_count; struct list_head killed_trees; + int tree_count; int type; union { diff --git a/kernel/cgroup.c b/kernel/cgroup.c index cd83d9933b6..7ccba4bc5e3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -23,6 +23,7 @@ */ #include <linux/cgroup.h> +#include <linux/ctype.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/kernel.h> @@ -48,6 +49,8 @@ #include <linux/namei.h> #include <linux/smp_lock.h> #include <linux/pid_namespace.h> +#include <linux/idr.h> +#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ #include <asm/atomic.h> @@ -60,6 +63,8 @@ static struct cgroup_subsys *subsys[] = { #include <linux/cgroup_subsys.h> }; +#define MAX_CGROUP_ROOT_NAMELEN 64 + /* * A cgroupfs_root represents the root of a cgroup hierarchy, * and may be associated with a superblock to form an active @@ -74,6 +79,9 @@ struct cgroupfs_root { */ unsigned long subsys_bits; + /* Unique id for this hierarchy. */ + int hierarchy_id; + /* The bitmask of subsystems currently attached to this hierarchy */ unsigned long actual_subsys_bits; @@ -94,6 +102,9 @@ struct cgroupfs_root { /* The path to use for release notifications. */ char release_agent_path[PATH_MAX]; + + /* The name for this hierarchy - may be empty */ + char name[MAX_CGROUP_ROOT_NAMELEN]; }; /* @@ -141,6 +152,10 @@ struct css_id { static LIST_HEAD(roots); static int root_count; +static DEFINE_IDA(hierarchy_ida); +static int next_hierarchy_id; +static DEFINE_SPINLOCK(hierarchy_id_lock); + /* dummytop is a shorthand for the dummy hierarchy's top cgroup */ #define dummytop (&rootnode.top_cgroup) @@ -201,6 +216,7 @@ struct cg_cgroup_link { * cgroup, anchored on cgroup->css_sets */ struct list_head cgrp_link_list; + struct cgroup *cgrp; /* * List running through cg_cgroup_links pointing at a * single css_set object, anchored on css_set->cg_links @@ -227,8 +243,11 @@ static int cgroup_subsys_init_idr(struct cgroup_subsys *ss); static DEFINE_RWLOCK(css_set_lock); static int css_set_count; -/* hash table for cgroup groups. This improves the performance to - * find an existing css_set */ +/* + * hash table for cgroup groups. This improves the performance to find + * an existing css_set. This hash doesn't (currently) take into + * account cgroups in empty hierarchies. + */ #define CSS_SET_HASH_BITS 7 #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; @@ -248,48 +267,22 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) return &css_set_table[index]; } +static void free_css_set_rcu(struct rcu_head *obj) +{ + struct css_set *cg = container_of(obj, struct css_set, rcu_head); + kfree(cg); +} + /* We don't maintain the lists running through each css_set to its * task until after the first call to cgroup_iter_start(). This * reduces the fork()/exit() overhead for people who have cgroups * compiled into their kernel but not actually in use */ static int use_task_css_set_links __read_mostly; -/* When we create or destroy a css_set, the operation simply - * takes/releases a reference count on all the cgroups referenced - * by subsystems in this css_set. This can end up multiple-counting - * some cgroups, but that's OK - the ref-count is just a - * busy/not-busy indicator; ensuring that we only count each cgroup - * once would require taking a global lock to ensure that no - * subsystems moved between hierarchies while we were doing so. - * - * Possible TODO: decide at boot time based on the number of - * registered subsystems and the number of CPUs or NUMA nodes whether - * it's better for performance to ref-count every subsystem, or to - * take a global lock and only add one ref count to each hierarchy. - */ - -/* - * unlink a css_set from the list and free it - */ -static void unlink_css_set(struct css_set *cg) +static void __put_css_set(struct css_set *cg, int taskexit) { struct cg_cgroup_link *link; struct cg_cgroup_link *saved_link; - - hlist_del(&cg->hlist); - css_set_count--; - - list_for_each_entry_safe(link, saved_link, &cg->cg_links, - cg_link_list) { - list_del(&link->cg_link_list); - list_del(&link->cgrp_link_list); - kfree(link); - } -} - -static void __put_css_set(struct css_set *cg, int taskexit) -{ - int i; /* * Ensure that the refcount doesn't hit zero while any readers * can see it. Similar to atomic_dec_and_lock(), but for an @@ -302,21 +295,28 @@ static void __put_css_set(struct css_set *cg, int taskexit) write_unlock(&css_set_lock); return; } - unlink_css_set(cg); - write_unlock(&css_set_lock); - rcu_read_lock(); - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup); + /* This css_set is dead. unlink it and release cgroup refcounts */ + hlist_del(&cg->hlist); + css_set_count--; + + list_for_each_entry_safe(link, saved_link, &cg->cg_links, + cg_link_list) { + struct cgroup *cgrp = link->cgrp; + list_del(&link->cg_link_list); + list_del(&link->cgrp_link_list); if (atomic_dec_and_test(&cgrp->count) && notify_on_release(cgrp)) { if (taskexit) set_bit(CGRP_RELEASABLE, &cgrp->flags); check_for_release(cgrp); } + + kfree(link); } - rcu_read_unlock(); - kfree(cg); + + write_unlock(&css_set_lock); + call_rcu(&cg->rcu_head, free_css_set_rcu); } /* @@ -338,6 +338,78 @@ static inline void put_css_set_taskexit(struct css_set *cg) } /* + * compare_css_sets - helper function for find_existing_css_set(). + * @cg: candidate css_set being tested + * @old_cg: existing css_set for a task + * @new_cgrp: cgroup that's being entered by the task + * @template: desired set of css pointers in css_set (pre-calculated) + * + * Returns true if "cg" matches "old_cg" except for the hierarchy + * which "new_cgrp" belongs to, for which it should match "new_cgrp". + */ +static bool compare_css_sets(struct css_set *cg, + struct css_set *old_cg, + struct cgroup *new_cgrp, + struct cgroup_subsys_state *template[]) +{ + struct list_head *l1, *l2; + + if (memcmp(template, cg->subsys, sizeof(cg->subsys))) { + /* Not all subsystems matched */ + return false; + } + + /* + * Compare cgroup pointers in order to distinguish between + * different cgroups in heirarchies with no subsystems. We + * could get by with just this check alone (and skip the + * memcmp above) but on most setups the memcmp check will + * avoid the need for this more expensive check on almost all + * candidates. + */ + + l1 = &cg->cg_links; + l2 = &old_cg->cg_links; + while (1) { + struct cg_cgroup_link *cgl1, *cgl2; + struct cgroup *cg1, *cg2; + + l1 = l1->next; + l2 = l2->next; + /* See if we reached the end - both lists are equal length. */ + if (l1 == &cg->cg_links) { + BUG_ON(l2 != &old_cg->cg_links); + break; + } else { + BUG_ON(l2 == &old_cg->cg_links); + } + /* Locate the cgroups associated with these links. */ + cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list); + cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list); + cg1 = cgl1->cgrp; + cg2 = cgl2->cgrp; + /* Hierarchies should be linked in the same order. */ + BUG_ON(cg1->root != cg2->root); + + /* + * If this hierarchy is the hierarchy of the cgroup + * that's changing, then we need to check that this + * css_set points to the new cgroup; if it's any other + * hierarchy, then this css_set should point to the + * same cgroup as the old css_set. + */ + if (cg1->root == new_cgrp->root) { + if (cg1 != new_cgrp) + return false; + } else { + if (cg1 != cg2) + return false; + } + } + return true; +} + +/* * find_existing_css_set() is a helper for * find_css_set(), and checks to see whether an existing * css_set is suitable. @@ -378,10 +450,11 @@ static struct css_set *find_existing_css_set( hhead = css_set_hash(template); hlist_for_each_entry(cg, node, hhead, hlist) { - if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { - /* All subsystems matched */ - return cg; - } + if (!compare_css_sets(cg, oldcg, cgrp, template)) + continue; + + /* This css_set matches what we need */ + return cg; } /* No existing cgroup group matched */ @@ -435,8 +508,14 @@ static void link_css_set(struct list_head *tmp_cg_links, link = list_first_entry(tmp_cg_links, struct cg_cgroup_link, cgrp_link_list); link->cg = cg; + link->cgrp = cgrp; + atomic_inc(&cgrp->count); list_move(&link->cgrp_link_list, &cgrp->css_sets); - list_add(&link->cg_link_list, &cg->cg_links); + /* + * Always add links to the tail of the list so that the list + * is sorted by order of hierarchy creation + */ + list_add_tail(&link->cg_link_list, &cg->cg_links); } /* @@ -451,11 +530,11 @@ static struct css_set *find_css_set( { struct css_set *res; struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; - int i; struct list_head tmp_cg_links; struct hlist_head *hhead; + struct cg_cgroup_link *link; /* First see if we already have a cgroup group that matches * the desired set */ @@ -489,20 +568,12 @@ static struct css_set *find_css_set( write_lock(&css_set_lock); /* Add reference counts and links from the new css_set. */ - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { - struct cgroup *cgrp = res->subsys[i]->cgroup; - struct cgroup_subsys *ss = subsys[i]; - atomic_inc(&cgrp->count); - /* - * We want to add a link once per cgroup, so we - * only do it for the first subsystem in each - * hierarchy - */ - if (ss->root->subsys_list.next == &ss->sibling) - link_css_set(&tmp_cg_links, res, cgrp); + list_for_each_entry(link, &oldcg->cg_links, cg_link_list) { + struct cgroup *c = link->cgrp; + if (c->root == cgrp->root) + c = cgrp; + link_css_set(&tmp_cg_links, res, c); } - if (list_empty(&rootnode.subsys_list)) - link_css_set(&tmp_cg_links, res, dummytop); BUG_ON(!list_empty(&tmp_cg_links)); @@ -518,6 +589,41 @@ static struct css_set *find_css_set( } /* + * Return the cgroup for "task" from the given hierarchy. Must be + * called with cgroup_mutex held. + */ +static struct cgroup *task_cgroup_from_root(struct task_struct *task, + struct cgroupfs_root *root) +{ + struct css_set *css; + struct cgroup *res = NULL; + + BUG_ON(!mutex_is_locked(&cgroup_mutex)); + read_lock(&css_set_lock); + /* + * No need to lock the task - since we hold cgroup_mutex the + * task can't change groups, so the only thing that can happen + * is that it exits and its css is set back to init_css_set. + */ + css = task->cgroups; + if (css == &init_css_set) { + res = &root->top_cgroup; + } else { + struct cg_cgroup_link *link; + list_for_each_entry(link, &css->cg_links, cg_link_list) { + struct cgroup *c = link->cgrp; + if (c->root == root) { + res = c; + break; + } + } + } + read_unlock(&css_set_lock); + BUG_ON(!res); + return res; +} + +/* * There is one global cgroup mutex. We also require taking * task_lock() when dereferencing a task's cgroup subsys pointers. * See "The task_lock() exception", at the end of this comment. @@ -677,6 +783,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) */ deactivate_super(cgrp->root->sb); + /* + * if we're getting rid of the cgroup, refcount should ensure + * that there are no pidlists left. + */ + BUG_ON(!list_empty(&cgrp->pidlists)); + call_rcu(&cgrp->rcu_head, free_cgroup_rcu); } iput(inode); @@ -841,6 +953,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",noprefix"); if (strlen(root->release_agent_path)) seq_printf(seq, ",release_agent=%s", root->release_agent_path); + if (strlen(root->name)) + seq_printf(seq, ",name=%s", root->name); mutex_unlock(&cgroup_mutex); return 0; } @@ -849,6 +963,12 @@ struct cgroup_sb_opts { unsigned long subsys_bits; unsigned long flags; char *release_agent; + char *name; + /* User explicitly requested empty subsystem */ + bool none; + + struct cgroupfs_root *new_root; + }; /* Convert a hierarchy specifier into a bitmask of subsystems and @@ -863,9 +983,7 @@ static int parse_cgroupfs_options(char *data, mask = ~(1UL << cpuset_subsys_id); #endif - opts->subsys_bits = 0; - opts->flags = 0; - opts->release_agent = NULL; + memset(opts, 0, sizeof(*opts)); while ((token = strsep(&o, ",")) != NULL) { if (!*token) @@ -879,17 +997,42 @@ static int parse_cgroupfs_options(char *data, if (!ss->disabled) opts->subsys_bits |= 1ul << i; } + } else if (!strcmp(token, "none")) { + /* Explicitly have no subsystems */ + opts->none = true; } else if (!strcmp(token, "noprefix")) { set_bit(ROOT_NOPREFIX, &opts->flags); } else if (!strncmp(token, "release_agent=", 14)) { /* Specifying two release agents is forbidden */ if (opts->release_agent) return -EINVAL; - opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL); + opts->release_agent = + kstrndup(token + 14, PATH_MAX, GFP_KERNEL); if (!opts->release_agent) return -ENOMEM; - strncpy(opts->release_agent, token + 14, PATH_MAX - 1); - opts->release_agent[PATH_MAX - 1] = 0; + } else if (!strncmp(token, "name=", 5)) { + int i; + const char *name = token + 5; + /* Can't specify an empty name */ + if (!strlen(name)) + return -EINVAL; + /* Must match [\w.-]+ */ + for (i = 0; i < strlen(name); i++) { + char c = name[i]; + if (isalnum(c)) + continue; + if ((c == '.') || (c == '-') || (c == '_')) + continue; + return -EINVAL; + } + /* Specifying two names is forbidden */ + if (opts->name) + return -EINVAL; + opts->name = kstrndup(name, + MAX_CGROUP_ROOT_NAMELEN, + GFP_KERNEL); + if (!opts->name) + return -ENOMEM; } else { struct cgroup_subsys *ss; int i; @@ -906,6 +1049,8 @@ static int parse_cgroupfs_options(char *data, } } + /* Consistency checks */ + /* * Option noprefix was introduced just for backward compatibility * with the old cpuset, so we allow noprefix only if mounting just @@ -915,8 +1060,16 @@ static int parse_cgroupfs_options(char *data, (opts->subsys_bits & mask)) return -EINVAL; - /* We can't have an empty hierarchy */ - if (!opts->subsys_bits) + + /* Can't specify "none" and some subsystems */ + if (opts->subsys_bits && opts->none) + return -EINVAL; + + /* + * We either have to specify by name or by subsystems. (So all + * empty hierarchies must have a name). + */ + if (!opts->subsys_bits && !opts->name) return -EINVAL; return 0; @@ -944,6 +1097,12 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) goto out_unlock; } + /* Don't allow name to change at remount */ + if (opts.name && strcmp(opts.name, root->name)) { + ret = -EINVAL; + goto out_unlock; + } + ret = rebind_subsystems(root, opts.subsys_bits); if (ret) goto out_unlock; @@ -955,6 +1114,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) strcpy(root->release_agent_path, opts.release_agent); out_unlock: kfree(opts.release_agent); + kfree(opts.name); mutex_unlock(&cgroup_mutex); mutex_unlock(&cgrp->dentry->d_inode->i_mutex); unlock_kernel(); @@ -974,9 +1134,10 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_LIST_HEAD(&cgrp->children); INIT_LIST_HEAD(&cgrp->css_sets); INIT_LIST_HEAD(&cgrp->release_list); - INIT_LIST_HEAD(&cgrp->pids_list); - init_rwsem(&cgrp->pids_mutex); + INIT_LIST_HEAD(&cgrp->pidlists); + mutex_init(&cgrp->pidlist_mutex); } + static void init_cgroup_root(struct cgroupfs_root *root) { struct cgroup *cgrp = &root->top_cgroup; @@ -988,33 +1149,106 @@ static void init_cgroup_root(struct cgroupfs_root *root) init_cgroup_housekeeping(cgrp); } +static bool init_root_id(struct cgroupfs_root *root) +{ + int ret = 0; + + do { + if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL)) + return false; + spin_lock(&hierarchy_id_lock); + /* Try to allocate the next unused ID */ + ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id, + &root->hierarchy_id); + if (ret == -ENOSPC) + /* Try again starting from 0 */ + ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id); + if (!ret) { + next_hierarchy_id = root->hierarchy_id + 1; + } else if (ret != -EAGAIN) { + /* Can only get here if the 31-bit IDR is full ... */ + BUG_ON(ret); + } + spin_unlock(&hierarchy_id_lock); + } while (ret); + return true; +} + static int cgroup_test_super(struct super_block *sb, void *data) { - struct cgroupfs_root *new = data; + struct cgroup_sb_opts *opts = data; struct cgroupfs_root *root = sb->s_fs_info; - /* First check subsystems */ - if (new->subsys_bits != root->subsys_bits) - return 0; + /* If we asked for a name then it must match */ + if (opts->name && strcmp(opts->name, root->name)) + return 0; - /* Next check flags */ - if (new->flags != root->flags) + /* + * If we asked for subsystems (or explicitly for no + * subsystems) then they must match + */ + if ((opts->subsys_bits || opts->none) + && (opts->subsys_bits != root->subsys_bits)) return 0; return 1; } +static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) +{ + struct cgroupfs_root *root; + + if (!opts->subsys_bits && !opts->none) + return NULL; + + root = kzalloc(sizeof(*root), GFP_KERNEL); + if (!root) + return ERR_PTR(-ENOMEM); + + if (!init_root_id(root)) { + kfree(root); + return ERR_PTR(-ENOMEM); + } + init_cgroup_root(root); + + root->subsys_bits = opts->subsys_bits; + root->flags = opts->flags; + if (opts->release_agent) + strcpy(root->release_agent_path, opts->release_agent); + if (opts->name) + strcpy(root->name, opts->name); + return root; +} + +static void cgroup_drop_root(struct cgroupfs_root *root) +{ + if (!root) + return; + + BUG_ON(!root->hierarchy_id); + spin_lock(&hierarchy_id_lock); + ida_remove(&hierarchy_ida, root->hierarchy_id); + spin_unlock(&hierarchy_id_lock); + kfree(root); +} + static int cgroup_set_super(struct super_block *sb, void *data) { int ret; - struct cgroupfs_root *root = data; + struct cgroup_sb_opts *opts = data; + + /* If we don't have a new root, we can't set up a new sb */ + if (!opts->new_root) + return -EINVAL; + + BUG_ON(!opts->subsys_bits && !opts->none); ret = set_anon_super(sb, NULL); if (ret) return ret; - sb->s_fs_info = root; - root->sb = sb; + sb->s_fs_info = opts->new_root; + opts->new_root->sb = sb; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; @@ -1051,48 +1285,43 @@ static int cgroup_get_sb(struct file_system_type *fs_type, void *data, struct vfsmount *mnt) { struct cgroup_sb_opts opts; + struct cgroupfs_root *root; int ret = 0; struct super_block *sb; - struct cgroupfs_root *root; - struct list_head tmp_cg_links; + struct cgroupfs_root *new_root; /* First find the desired set of subsystems */ ret = parse_cgroupfs_options(data, &opts); - if (ret) { - kfree(opts.release_agent); - return ret; - } - - root = kzalloc(sizeof(*root), GFP_KERNEL); - if (!root) { - kfree(opts.release_agent); - return -ENOMEM; - } + if (ret) + goto out_err; - init_cgroup_root(root); - root->subsys_bits = opts.subsys_bits; - root->flags = opts.flags; - if (opts.release_agent) { - strcpy(root->release_agent_path, opts.release_agent); - kfree(opts.release_agent); + /* + * Allocate a new cgroup root. We may not need it if we're + * reusing an existing hierarchy. + */ + new_root = cgroup_root_from_opts(&opts); + if (IS_ERR(new_root)) { + ret = PTR_ERR(new_root); + goto out_err; } + opts.new_root = new_root; - sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root); - + /* Locate an existing or new sb for this hierarchy */ + sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts); if (IS_ERR(sb)) { - kfree(root); - return PTR_ERR(sb); + ret = PTR_ERR(sb); + cgroup_drop_root(opts.new_root); + goto out_err; } - if (sb->s_fs_info != root) { - /* Reusing an existing superblock */ - BUG_ON(sb->s_root == NULL); - kfree(root); - root = NULL; - } else { - /* New superblock */ + root = sb->s_fs_info; + BUG_ON(!root); + if (root == opts.new_root) { + /* We used the new root structure, so this is a new hierarchy */ + struct list_head tmp_cg_links; struct cgroup *root_cgrp = &root->top_cgroup; struct inode *inode; + struct cgroupfs_root *existing_root; int i; BUG_ON(sb->s_root != NULL); @@ -1105,6 +1334,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type, mutex_lock(&inode->i_mutex); mutex_lock(&cgroup_mutex); + if (strlen(root->name)) { + /* Check for name clashes with existing mounts */ + for_each_active_root(existing_root) { + if (!strcmp(existing_root->name, root->name)) { + ret = -EBUSY; + mutex_unlock(&cgroup_mutex); + mutex_unlock(&inode->i_mutex); + goto drop_new_super; + } + } + } + /* * We're accessing css_set_count without locking * css_set_lock here, but that's OK - it can only be @@ -1123,7 +1364,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type, if (ret == -EBUSY) { mutex_unlock(&cgroup_mutex); mutex_unlock(&inode->i_mutex); - goto free_cg_links; + free_cg_links(&tmp_cg_links); + goto drop_new_super; } /* EBUSY should be the only error here */ @@ -1155,17 +1397,27 @@ static int cgroup_get_sb(struct file_system_type *fs_type, BUG_ON(root->number_of_cgroups != 1); cgroup_populate_dir(root_cgrp); - mutex_unlock(&inode->i_mutex); mutex_unlock(&cgroup_mutex); + mutex_unlock(&inode->i_mutex); + } else { + /* + * We re-used an existing hierarchy - the new root (if + * any) is not needed + */ + cgroup_drop_root(opts.new_root); } simple_set_mnt(mnt, sb); + kfree(opts.release_agent); + kfree(opts.name); return 0; - free_cg_links: - free_cg_links(&tmp_cg_links); drop_new_super: deactivate_locked_super(sb); + out_err: + kfree(opts.release_agent); + kfree(opts.name); + return ret; } @@ -1211,7 +1463,7 @@ static void cgroup_kill_sb(struct super_block *sb) { mutex_unlock(&cgroup_mutex); kill_litter_super(sb); - kfree(root); + cgroup_drop_root(root); } static struct file_system_type cgroup_fs_type = { @@ -1276,27 +1528,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) return 0; } -/* - * Return the first subsystem attached to a cgroup's hierarchy, and - * its subsystem id. - */ - -static void get_first_subsys(const struct cgroup *cgrp, - struct cgroup_subsys_state **css, int *subsys_id) -{ - const struct cgroupfs_root *root = cgrp->root; - const struct cgroup_subsys *test_ss; - BUG_ON(list_empty(&root->subsys_list)); - test_ss = list_entry(root->subsys_list.next, - struct cgroup_subsys, sibling); - if (css) { - *css = cgrp->subsys[test_ss->subsys_id]; - BUG_ON(!*css); - } - if (subsys_id) - *subsys_id = test_ss->subsys_id; -} - /** * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' * @cgrp: the cgroup the task is attaching to @@ -1313,18 +1544,15 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) struct css_set *cg; struct css_set *newcg; struct cgroupfs_root *root = cgrp->root; - int subsys_id; - - get_first_subsys(cgrp, NULL, &subsys_id); /* Nothing to do if the task is already in that cgroup */ - oldcgrp = task_cgroup(tsk, subsys_id); + oldcgrp = task_cgroup_from_root(tsk, root); if (cgrp == oldcgrp) return 0; for_each_subsys(root, ss) { if (ss->can_attach) { - retval = ss->can_attach(ss, cgrp, tsk); + retval = ss->can_attach(ss, cgrp, tsk, false); if (retval) return retval; } @@ -1362,7 +1590,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) for_each_subsys(root, ss) { if (ss->attach) - ss->attach(ss, cgrp, oldcgrp, tsk); + ss->attach(ss, cgrp, oldcgrp, tsk, false); } set_bit(CGRP_RELEASABLE, &oldcgrp->flags); synchronize_rcu(); @@ -1423,15 +1651,6 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) return ret; } -/* The various types of files and directories in a cgroup file system */ -enum cgroup_filetype { - FILE_ROOT, - FILE_DIR, - FILE_TASKLIST, - FILE_NOTIFY_ON_RELEASE, - FILE_RELEASE_AGENT, -}; - /** * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive. * @cgrp: the cgroup to be checked for liveness @@ -1876,7 +2095,7 @@ int cgroup_task_count(const struct cgroup *cgrp) * the start of a css_set */ static void cgroup_advance_iter(struct cgroup *cgrp, - struct cgroup_iter *it) + struct cgroup_iter *it) { struct list_head *l = it->cg_link; struct cg_cgroup_link *link; @@ -2129,7 +2348,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) } /* - * Stuff for reading the 'tasks' file. + * Stuff for reading the 'tasks'/'procs' files. * * Reading this file can return large amounts of data if a cgroup has * *lots* of attached tasks. So it may need several calls to read(), @@ -2139,27 +2358,196 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) */ /* - * Load into 'pidarray' up to 'npids' of the tasks using cgroup - * 'cgrp'. Return actual number of pids loaded. No need to - * task_lock(p) when reading out p->cgroup, since we're in an RCU - * read section, so the css_set can't go away, and is - * immutable after creation. + * The following two functions "fix" the issue where there are more pids + * than kmalloc will give memory for; in such cases, we use vmalloc/vfree. + * TODO: replace with a kernel-wide solution to this problem + */ +#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2)) +static void *pidlist_allocate(int count) +{ + if (PIDLIST_TOO_LARGE(count)) + return vmalloc(count * sizeof(pid_t)); + else + return kmalloc(count * sizeof(pid_t), GFP_KERNEL); +} +static void pidlist_free(void *p) +{ + if (is_vmalloc_addr(p)) + vfree(p); + else + kfree(p); +} +static void *pidlist_resize(void *p, int newcount) +{ + void *newlist; + /* note: if new alloc fails, old p will still be valid either way */ + if (is_vmalloc_addr(p)) { + newlist = vmalloc(newcount * sizeof(pid_t)); + if (!newlist) + return NULL; + memcpy(newlist, p, newcount * sizeof(pid_t)); + vfree(p); + } else { + newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL); + } + return newlist; +} + +/* + * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries + * If the new stripped list is sufficiently smaller and there's enough memory + * to allocate a new buffer, will let go of the unneeded memory. Returns the + * number of unique elements. + */ +/* is the size difference enough that we should re-allocate the array? */ +#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new)) +static int pidlist_uniq(pid_t **p, int length) +{ + int src, dest = 1; + pid_t *list = *p; + pid_t *newlist; + + /* + * we presume the 0th element is unique, so i starts at 1. trivial + * edge cases first; no work needs to be done for either + */ + if (length == 0 || length == 1) + return length; + /* src and dest walk down the list; dest counts unique elements */ + for (src = 1; src < length; src++) { + /* find next unique element */ + while (list[src] == list[src-1]) { + src++; + if (src == length) + goto after; + } + /* dest always points to where the next unique element goes */ + list[dest] = list[src]; + dest++; + } +after: + /* + * if the length difference is large enough, we want to allocate a + * smaller buffer to save memory. if this fails due to out of memory, + * we'll just stay with what we've got. + */ + if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) { + newlist = pidlist_resize(list, dest); + if (newlist) + *p = newlist; + } + return dest; +} + +static int cmppid(const void *a, const void *b) +{ + return *(pid_t *)a - *(pid_t *)b; +} + +/* + * find the appropriate pidlist for our purpose (given procs vs tasks) + * returns with the lock on that pidlist already held, and takes care + * of the use count, or returns NULL with no locks held if we're out of + * memory. */ -static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) +static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, + enum cgroup_filetype type) { - int n = 0, pid; + struct cgroup_pidlist *l; + /* don't need task_nsproxy() if we're looking at ourself */ + struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns); + /* + * We can't drop the pidlist_mutex before taking the l->mutex in case + * the last ref-holder is trying to remove l from the list at the same + * time. Holding the pidlist_mutex precludes somebody taking whichever + * list we find out from under us - compare release_pid_array(). + */ + mutex_lock(&cgrp->pidlist_mutex); + list_for_each_entry(l, &cgrp->pidlists, links) { + if (l->key.type == type && l->key.ns == ns) { + /* found a matching list - drop the extra refcount */ + put_pid_ns(ns); + /* make sure l doesn't vanish out from under us */ + down_write(&l->mutex); + mutex_unlock(&cgrp->pidlist_mutex); + l->use_count++; + return l; + } + } + /* entry not found; create a new one */ + l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); + if (!l) { + mutex_unlock(&cgrp->pidlist_mutex); + put_pid_ns(ns); + return l; + } + init_rwsem(&l->mutex); + down_write(&l->mutex); + l->key.type = type; + l->key.ns = ns; + l->use_count = 0; /* don't increment here */ + l->list = NULL; + l->owner = cgrp; + list_add(&l->links, &cgrp->pidlists); + mutex_unlock(&cgrp->pidlist_mutex); + return l; +} + +/* + * Load a cgroup's pidarray with either procs' tgids or tasks' pids + */ +static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, + struct cgroup_pidlist **lp) +{ + pid_t *array; + int length; + int pid, n = 0; /* used for populating the array */ struct cgroup_iter it; struct task_struct *tsk; + struct cgroup_pidlist *l; + + /* + * If cgroup gets more users after we read count, we won't have + * enough space - tough. This race is indistinguishable to the + * caller from the case that the additional cgroup users didn't + * show up until sometime later on. + */ + length = cgroup_task_count(cgrp); + array = pidlist_allocate(length); + if (!array) + return -ENOMEM; + /* now, populate the array */ cgroup_iter_start(cgrp, &it); while ((tsk = cgroup_iter_next(cgrp, &it))) { - if (unlikely(n == npids)) + if (unlikely(n == length)) break; - pid = task_pid_vnr(tsk); - if (pid > 0) - pidarray[n++] = pid; + /* get tgid or pid for procs or tasks file respectively */ + if (type == CGROUP_FILE_PROCS) + pid = task_tgid_vnr(tsk); + else + pid = task_pid_vnr(tsk); + if (pid > 0) /* make sure to only use valid results */ + array[n++] = pid; } cgroup_iter_end(cgrp, &it); - return n; + length = n; + /* now sort & (if procs) strip out duplicates */ + sort(array, length, sizeof(pid_t), cmppid, NULL); + if (type == CGROUP_FILE_PROCS) + length = pidlist_uniq(&array, length); + l = cgroup_pidlist_find(cgrp, type); + if (!l) { + pidlist_free(array); + return -ENOMEM; + } + /* store array, freeing old if necessary - lock already held */ + pidlist_free(l->list); + l->list = array; + l->length = length; + l->use_count++; + up_write(&l->mutex); + *lp = l; + return 0; } /** @@ -2216,37 +2604,14 @@ err: return ret; } -/* - * Cache pids for all threads in the same pid namespace that are - * opening the same "tasks" file. - */ -struct cgroup_pids { - /* The node in cgrp->pids_list */ - struct list_head list; - /* The cgroup those pids belong to */ - struct cgroup *cgrp; - /* The namepsace those pids belong to */ - struct pid_namespace *ns; - /* Array of process ids in the cgroup */ - pid_t *tasks_pids; - /* How many files are using the this tasks_pids array */ - int use_count; - /* Length of the current tasks_pids array */ - int length; -}; - -static int cmppid(const void *a, const void *b) -{ - return *(pid_t *)a - *(pid_t *)b; -} /* - * seq_file methods for the "tasks" file. The seq_file position is the + * seq_file methods for the tasks/procs files. The seq_file position is the * next pid to display; the seq_file iterator is a pointer to the pid - * in the cgroup->tasks_pids array. + * in the cgroup->l->list array. */ -static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) +static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) { /* * Initially we receive a position value that corresponds to @@ -2254,48 +2619,45 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) * after a seek to the start). Use a binary-search to find the * next pid to display, if any */ - struct cgroup_pids *cp = s->private; - struct cgroup *cgrp = cp->cgrp; + struct cgroup_pidlist *l = s->private; int index = 0, pid = *pos; int *iter; - down_read(&cgrp->pids_mutex); + down_read(&l->mutex); if (pid) { - int end = cp->length; + int end = l->length; while (index < end) { int mid = (index + end) / 2; - if (cp->tasks_pids[mid] == pid) { + if (l->list[mid] == pid) { index = mid; break; - } else if (cp->tasks_pids[mid] <= pid) + } else if (l->list[mid] <= pid) index = mid + 1; else end = mid; } } /* If we're off the end of the array, we're done */ - if (index >= cp->length) + if (index >= l->length) return NULL; /* Update the abstract position to be the actual pid that we found */ - iter = cp->tasks_pids + index; + iter = l->list + index; *pos = *iter; return iter; } -static void cgroup_tasks_stop(struct seq_file *s, void *v) +static void cgroup_pidlist_stop(struct seq_file *s, void *v) { - struct cgroup_pids *cp = s->private; - struct cgroup *cgrp = cp->cgrp; - up_read(&cgrp->pids_mutex); + struct cgroup_pidlist *l = s->private; + up_read(&l->mutex); } -static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) +static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) { - struct cgroup_pids *cp = s->private; - int *p = v; - int *end = cp->tasks_pids + cp->length; - + struct cgroup_pidlist *l = s->private; + pid_t *p = v; + pid_t *end = l->list + l->length; /* * Advance to the next pid in the array. If this goes off the * end, we're done @@ -2309,124 +2671,107 @@ static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) } } -static int cgroup_tasks_show(struct seq_file *s, void *v) +static int cgroup_pidlist_show(struct seq_file *s, void *v) { return seq_printf(s, "%d\n", *(int *)v); } -static const struct seq_operations cgroup_tasks_seq_operations = { - .start = cgroup_tasks_start, - .stop = cgroup_tasks_stop, - .next = cgroup_tasks_next, - .show = cgroup_tasks_show, +/* + * seq_operations functions for iterating on pidlists through seq_file - + * independent of whether it's tasks or procs + */ +static const struct seq_operations cgroup_pidlist_seq_operations = { + .start = cgroup_pidlist_start, + .stop = cgroup_pidlist_stop, + .next = cgroup_pidlist_next, + .show = cgroup_pidlist_show, }; -static void release_cgroup_pid_array(struct cgroup_pids *cp) +static void cgroup_release_pid_array(struct cgroup_pidlist *l) { - struct cgroup *cgrp = cp->cgrp; - - down_write(&cgrp->pids_mutex); - BUG_ON(!cp->use_count); - if (!--cp->use_count) { - list_del(&cp->list); - put_pid_ns(cp->ns); - kfree(cp->tasks_pids); - kfree(cp); + /* + * the case where we're the last user of this particular pidlist will + * have us remove it from the cgroup's list, which entails taking the + * mutex. since in pidlist_find the pidlist->lock depends on cgroup-> + * pidlist_mutex, we have to take pidlist_mutex first. + */ + mutex_lock(&l->owner->pidlist_mutex); + down_write(&l->mutex); + BUG_ON(!l->use_count); + if (!--l->use_count) { + /* we're the last user if refcount is 0; remove and free */ + list_del(&l->links); + mutex_unlock(&l->owner->pidlist_mutex); + pidlist_free(l->list); + put_pid_ns(l->key.ns); + up_write(&l->mutex); + kfree(l); + return; } - up_write(&cgrp->pids_mutex); + mutex_unlock(&l->owner->pidlist_mutex); + up_write(&l->mutex); } -static int cgroup_tasks_release(struct inode *inode, struct file *file) +static int cgroup_pidlist_release(struct inode *inode, struct file *file) { - struct seq_file *seq; - struct cgroup_pids *cp; - + struct cgroup_pidlist *l; if (!(file->f_mode & FMODE_READ)) return 0; - - seq = file->private_data; - cp = seq->private; - - release_cgroup_pid_array(cp); + /* + * the seq_file will only be initialized if the file was opened for + * reading; hence we check if it's not null only in that case. + */ + l = ((struct seq_file *)file->private_data)->private; + cgroup_release_pid_array(l); return seq_release(inode, file); } -static struct file_operations cgroup_tasks_operations = { +static const struct file_operations cgroup_pidlist_operations = { .read = seq_read, .llseek = seq_lseek, .write = cgroup_file_write, - .release = cgroup_tasks_release, + .release = cgroup_pidlist_release, }; /* - * Handle an open on 'tasks' file. Prepare an array containing the - * process id's of tasks currently attached to the cgroup being opened. + * The following functions handle opens on a file that displays a pidlist + * (tasks or procs). Prepare an array of the process/thread IDs of whoever's + * in the cgroup. */ - -static int cgroup_tasks_open(struct inode *unused, struct file *file) +/* helper function for the two below it */ +static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type) { struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); - struct pid_namespace *ns = current->nsproxy->pid_ns; - struct cgroup_pids *cp; - pid_t *pidarray; - int npids; + struct cgroup_pidlist *l; int retval; /* Nothing to do for write-only files */ if (!(file->f_mode & FMODE_READ)) return 0; - /* - * If cgroup gets more users after we read count, we won't have - * enough space - tough. This race is indistinguishable to the - * caller from the case that the additional cgroup users didn't - * show up until sometime later on. - */ - npids = cgroup_task_count(cgrp); - pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); - if (!pidarray) - return -ENOMEM; - npids = pid_array_load(pidarray, npids, cgrp); - sort(pidarray, npids, sizeof(pid_t), cmppid, NULL); - - /* - * Store the array in the cgroup, freeing the old - * array if necessary - */ - down_write(&cgrp->pids_mutex); - - list_for_each_entry(cp, &cgrp->pids_list, list) { - if (ns == cp->ns) - goto found; - } - - cp = kzalloc(sizeof(*cp), GFP_KERNEL); - if (!cp) { - up_write(&cgrp->pids_mutex); - kfree(pidarray); - return -ENOMEM; - } - cp->cgrp = cgrp; - cp->ns = ns; - get_pid_ns(ns); - list_add(&cp->list, &cgrp->pids_list); -found: - kfree(cp->tasks_pids); - cp->tasks_pids = pidarray; - cp->length = npids; - cp->use_count++; - up_write(&cgrp->pids_mutex); - - file->f_op = &cgroup_tasks_operations; + /* have the array populated */ + retval = pidlist_array_load(cgrp, type, &l); + if (retval) + return retval; + /* configure file information */ + file->f_op = &cgroup_pidlist_operations; - retval = seq_open(file, &cgroup_tasks_seq_operations); + retval = seq_open(file, &cgroup_pidlist_seq_operations); if (retval) { - release_cgroup_pid_array(cp); + cgroup_release_pid_array(l); return retval; } - ((struct seq_file *)file->private_data)->private = cp; + ((struct seq_file *)file->private_data)->private = l; return 0; } +static int cgroup_tasks_open(struct inode *unused, struct file *file) +{ + return cgroup_pidlist_open(file, CGROUP_FILE_TASKS); +} +static int cgroup_procs_open(struct inode *unused, struct file *file) +{ + return cgroup_pidlist_open(file, CGROUP_FILE_PROCS); +} static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, struct cftype *cft) @@ -2449,21 +2794,27 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp, /* * for the common functions, 'private' gives the type of file */ +/* for hysterical raisins, we can't put this on the older files */ +#define CGROUP_FILE_GENERIC_PREFIX "cgroup." static struct cftype files[] = { { .name = "tasks", .open = cgroup_tasks_open, .write_u64 = cgroup_tasks_write, - .release = cgroup_tasks_release, - .private = FILE_TASKLIST, + .release = cgroup_pidlist_release, .mode = S_IRUGO | S_IWUSR, }, - + { + .name = CGROUP_FILE_GENERIC_PREFIX "procs", + .open = cgroup_procs_open, + /* .write_u64 = cgroup_procs_write, TODO */ + .release = cgroup_pidlist_release, + .mode = S_IRUGO, + }, { .name = "notify_on_release", .read_u64 = cgroup_read_notify_on_release, .write_u64 = cgroup_write_notify_on_release, - .private = FILE_NOTIFY_ON_RELEASE, }, }; @@ -2472,7 +2823,6 @@ static struct cftype cft_release_agent = { .read_seq_string = cgroup_release_agent_show, .write_string = cgroup_release_agent_write, .max_write_len = PATH_MAX, - .private = FILE_RELEASE_AGENT, }; static int cgroup_populate_dir(struct cgroup *cgrp) @@ -2879,6 +3229,7 @@ int __init cgroup_init_early(void) init_task.cgroups = &init_css_set; init_css_set_link.cg = &init_css_set; + init_css_set_link.cgrp = dummytop; list_add(&init_css_set_link.cgrp_link_list, &rootnode.top_cgroup.css_sets); list_add(&init_css_set_link.cg_link_list, @@ -2933,7 +3284,7 @@ int __init cgroup_init(void) /* Add init_css_set to the hash table */ hhead = css_set_hash(init_css_set.subsys); hlist_add_head(&init_css_set.hlist, hhead); - + BUG_ON(!init_root_id(&rootnode)); err = register_filesystem(&cgroup_fs_type); if (err < 0) goto out; @@ -2986,15 +3337,16 @@ static int proc_cgroup_show(struct seq_file *m, void *v) for_each_active_root(root) { struct cgroup_subsys *ss; struct cgroup *cgrp; - int subsys_id; int count = 0; - seq_printf(m, "%lu:", root->subsys_bits); + seq_printf(m, "%d:", root->hierarchy_id); for_each_subsys(root, ss) seq_printf(m, "%s%s", count++ ? "," : "", ss->name); + if (strlen(root->name)) + seq_printf(m, "%sname=%s", count ? "," : "", + root->name); seq_putc(m, ':'); - get_first_subsys(&root->top_cgroup, NULL, &subsys_id); - cgrp = task_cgroup(tsk, subsys_id); + cgrp = task_cgroup_from_root(tsk, root); retval = cgroup_path(cgrp, buf, PAGE_SIZE); if (retval < 0) goto out_unlock; @@ -3033,8 +3385,8 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v) mutex_lock(&cgroup_mutex); for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; - seq_printf(m, "%s\t%lu\t%d\t%d\n", - ss->name, ss->root->subsys_bits, + seq_printf(m, "%s\t%d\t%d\t%d\n", + ss->name, ss->root->hierarchy_id, ss->root->number_of_cgroups, !ss->disabled); } mutex_unlock(&cgroup_mutex); @@ -3320,13 +3672,11 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task) { int ret; struct cgroup *target; - int subsys_id; if (cgrp == dummytop) return 1; - get_first_subsys(cgrp, NULL, &subsys_id); - target = task_cgroup(task, subsys_id); + target = task_cgroup_from_root(task, cgrp->root); while (cgrp != target && cgrp!= cgrp->top_cgroup) cgrp = cgrp->parent; ret = (cgrp == target); @@ -3693,3 +4043,154 @@ css_get_next(struct cgroup_subsys *ss, int id, return ret; } +#ifdef CONFIG_CGROUP_DEBUG +static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, + struct cgroup *cont) +{ + struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); + + if (!css) + return ERR_PTR(-ENOMEM); + + return css; +} + +static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont) +{ + kfree(cont->subsys[debug_subsys_id]); +} + +static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft) +{ + return atomic_read(&cont->count); +} + +static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft) +{ + return cgroup_task_count(cont); +} + +static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft) +{ + return (u64)(unsigned long)current->cgroups; +} + +static u64 current_css_set_refcount_read(struct cgroup *cont, + struct cftype *cft) +{ + u64 count; + + rcu_read_lock(); + count = atomic_read(¤t->cgroups->refcount); + rcu_read_unlock(); + return count; +} + +static int current_css_set_cg_links_read(struct cgroup *cont, + struct cftype *cft, + struct seq_file *seq) +{ + struct cg_cgroup_link *link; + struct css_set *cg; + + read_lock(&css_set_lock); + rcu_read_lock(); + cg = rcu_dereference(current->cgroups); + list_for_each_entry(link, &cg->cg_links, cg_link_list) { + struct cgroup *c = link->cgrp; + const char *name; + + if (c->dentry) + name = c->dentry->d_name.name; + else + name = "?"; + seq_printf(seq, "Root %d group %s\n", + c->root->hierarchy_id, name); + } + rcu_read_unlock(); + read_unlock(&css_set_lock); + return 0; +} + +#define MAX_TASKS_SHOWN_PER_CSS 25 +static int cgroup_css_links_read(struct cgroup *cont, + struct cftype *cft, + struct seq_file *seq) +{ + struct cg_cgroup_link *link; + + read_lock(&css_set_lock); + list_for_each_entry(link, &cont->css_sets, cgrp_link_list) { + struct css_set *cg = link->cg; + struct task_struct *task; + int count = 0; + seq_printf(seq, "css_set %p\n", cg); + list_for_each_entry(task, &cg->tasks, cg_list) { + if (count++ > MAX_TASKS_SHOWN_PER_CSS) { + seq_puts(seq, " ...\n"); + break; + } else { + seq_printf(seq, " task %d\n", + task_pid_vnr(task)); + } + } + } + read_unlock(&css_set_lock); + return 0; +} + +static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) +{ + return test_bit(CGRP_RELEASABLE, &cgrp->flags); +} + +static struct cftype debug_files[] = { + { + .name = "cgroup_refcount", + .read_u64 = cgroup_refcount_read, + }, + { + .name = "taskcount", + .read_u64 = debug_taskcount_read, + }, + + { + .name = "current_css_set", + .read_u64 = current_css_set_read, + }, + + { + .name = "current_css_set_refcount", + .read_u64 = current_css_set_refcount_read, + }, + + { + .name = "current_css_set_cg_links", + .read_seq_string = current_css_set_cg_links_read, + }, + + { + .name = "cgroup_css_links", + .read_seq_string = cgroup_css_links_read, + }, + + { + .name = "releasable", + .read_u64 = releasable_read, + }, +}; + +static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) +{ + return cgroup_add_files(cont, ss, debug_files, + ARRAY_SIZE(debug_files)); +} + +struct cgroup_subsys debug_subsys = { + .name = "debug", + .create = debug_create, + .destroy = debug_destroy, + .populate = debug_populate, + .subsys_id = debug_subsys_id, +}; +#endif /* CONFIG_CGROUP_DEBUG */ diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c deleted file mode 100644 index 0c92d797baa..00000000000 --- a/kernel/cgroup_debug.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * kernel/cgroup_debug.c - Example cgroup subsystem that - * exposes debug info - * - * Copyright (C) Google Inc, 2007 - * - * Developed by Paul Menage (menage@google.com) - * - */ - -#include <linux/cgroup.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include <linux/rcupdate.h> - -#include <asm/atomic.h> - -static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, - struct cgroup *cont) -{ - struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); - - if (!css) - return ERR_PTR(-ENOMEM); - - return css; -} - -static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont) -{ - kfree(cont->subsys[debug_subsys_id]); -} - -static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft) -{ - return atomic_read(&cont->count); -} - -static u64 taskcount_read(struct cgroup *cont, struct cftype *cft) -{ - u64 count; - - count = cgroup_task_count(cont); - return count; -} - -static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft) -{ - return (u64)(long)current->cgroups; -} - -static u64 current_css_set_refcount_read(struct cgroup *cont, - struct cftype *cft) -{ - u64 count; - - rcu_read_lock(); - count = atomic_read(¤t->cgroups->refcount); - rcu_read_unlock(); - return count; -} - -static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) -{ - return test_bit(CGRP_RELEASABLE, &cgrp->flags); -} - -static struct cftype files[] = { - { - .name = "cgroup_refcount", - .read_u64 = cgroup_refcount_read, - }, - { - .name = "taskcount", - .read_u64 = taskcount_read, - }, - - { - .name = "current_css_set", - .read_u64 = current_css_set_read, - }, - - { - .name = "current_css_set_refcount", - .read_u64 = current_css_set_refcount_read, - }, - - { - .name = "releasable", - .read_u64 = releasable_read, - }, -}; - -static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) -{ - return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); -} - -struct cgroup_subsys debug_subsys = { - .name = "debug", - .create = debug_create, - .destroy = debug_destroy, - .populate = debug_populate, - .subsys_id = debug_subsys_id, -}; diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index fb249e2bcad..59e9ef6aab4 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -159,7 +159,7 @@ static bool is_task_frozen_enough(struct task_struct *task) */ static int freezer_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup, - struct task_struct *task) + struct task_struct *task, bool threadgroup) { struct freezer *freezer; @@ -177,6 +177,19 @@ static int freezer_can_attach(struct cgroup_subsys *ss, if (freezer->state == CGROUP_FROZEN) return -EBUSY; + if (threadgroup) { + struct task_struct *c; + + rcu_read_lock(); + list_for_each_entry_rcu(c, &task->thread_group, thread_group) { + if (is_task_frozen_enough(c)) { + rcu_read_unlock(); + return -EBUSY; + } + } + rcu_read_unlock(); + } + return 0; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 7e75a41bd50..b5cb469d254 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1324,9 +1324,10 @@ static int fmeter_getrate(struct fmeter *fmp) static cpumask_var_t cpus_attach; /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ -static int cpuset_can_attach(struct cgroup_subsys *ss, - struct cgroup *cont, struct task_struct *tsk) +static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, + struct task_struct *tsk, bool threadgroup) { + int ret; struct cpuset *cs = cgroup_cs(cont); if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) @@ -1343,18 +1344,51 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, if (tsk->flags & PF_THREAD_BOUND) return -EINVAL; - return security_task_setscheduler(tsk, 0, NULL); + ret = security_task_setscheduler(tsk, 0, NULL); + if (ret) + return ret; + if (threadgroup) { + struct task_struct *c; + + rcu_read_lock(); + list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { + ret = security_task_setscheduler(c, 0, NULL); + if (ret) { + rcu_read_unlock(); + return ret; + } + } + rcu_read_unlock(); + } + return 0; +} + +static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, + struct cpuset *cs) +{ + int err; + /* + * can_attach beforehand should guarantee that this doesn't fail. + * TODO: have a better way to handle failure here + */ + err = set_cpus_allowed_ptr(tsk, cpus_attach); + WARN_ON_ONCE(err); + + task_lock(tsk); + cpuset_change_task_nodemask(tsk, to); + task_unlock(tsk); + cpuset_update_task_spread_flag(cs, tsk); + } -static void cpuset_attach(struct cgroup_subsys *ss, - struct cgroup *cont, struct cgroup *oldcont, - struct task_struct *tsk) +static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, + struct cgroup *oldcont, struct task_struct *tsk, + bool threadgroup) { nodemask_t from, to; struct mm_struct *mm; struct cpuset *cs = cgroup_cs(cont); struct cpuset *oldcs = cgroup_cs(oldcont); - int err; if (cs == &top_cpuset) { cpumask_copy(cpus_attach, cpu_possible_mask); @@ -1363,15 +1397,19 @@ static void cpuset_attach(struct cgroup_subsys *ss, guarantee_online_cpus(cs, cpus_attach); guarantee_online_mems(cs, &to); } - err = set_cpus_allowed_ptr(tsk, cpus_attach); - if (err) - return; - task_lock(tsk); - cpuset_change_task_nodemask(tsk, &to); - task_unlock(tsk); - cpuset_update_task_spread_flag(cs, tsk); + /* do per-task migration stuff possibly for each in the threadgroup */ + cpuset_attach_task(tsk, &to, cs); + if (threadgroup) { + struct task_struct *c; + rcu_read_lock(); + list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { + cpuset_attach_task(c, &to, cs); + } + rcu_read_unlock(); + } + /* change mm; only needs to be done once even if threadgroup */ from = oldcs->mems_allowed; to = cs->mems_allowed; mm = get_task_mm(tsk); diff --git a/kernel/cred.c b/kernel/cred.c index d7f7a01082e..dd76cfe5f5b 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -782,6 +782,25 @@ EXPORT_SYMBOL(set_create_files_as); #ifdef CONFIG_DEBUG_CREDENTIALS +bool creds_are_invalid(const struct cred *cred) +{ + if (cred->magic != CRED_MAGIC) + return true; + if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers)) + return true; +#ifdef CONFIG_SECURITY_SELINUX + if (selinux_is_enabled()) { + if ((unsigned long) cred->security < PAGE_SIZE) + return true; + if ((*(u32 *)cred->security & 0xffffff00) == + (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8)) + return true; + } +#endif + return false; +} +EXPORT_SYMBOL(creds_are_invalid); + /* * dump invalid credentials */ diff --git a/kernel/exit.c b/kernel/exit.c index 60d6fdcc926..5859f598c95 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -976,8 +976,6 @@ NORET_TYPE void do_exit(long code) disassociate_ctty(1); module_put(task_thread_info(tsk)->exec_domain->module); - if (tsk->binfmt) - module_put(tsk->binfmt->module); proc_exit_connector(tsk); @@ -1097,28 +1095,28 @@ struct wait_opts { int __user *wo_stat; struct rusage __user *wo_rusage; + wait_queue_t child_wait; int notask_error; }; -static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) +static inline +struct pid *task_pid_type(struct task_struct *task, enum pid_type type) { - struct pid *pid = NULL; - if (type == PIDTYPE_PID) - pid = task->pids[type].pid; - else if (type < PIDTYPE_MAX) - pid = task->group_leader->pids[type].pid; - return pid; + if (type != PIDTYPE_PID) + task = task->group_leader; + return task->pids[type].pid; } -static int eligible_child(struct wait_opts *wo, struct task_struct *p) +static int eligible_pid(struct wait_opts *wo, struct task_struct *p) { - int err; - - if (wo->wo_type < PIDTYPE_MAX) { - if (task_pid_type(p, wo->wo_type) != wo->wo_pid) - return 0; - } + return wo->wo_type == PIDTYPE_MAX || + task_pid_type(p, wo->wo_type) == wo->wo_pid; +} +static int eligible_child(struct wait_opts *wo, struct task_struct *p) +{ + if (!eligible_pid(wo, p)) + return 0; /* Wait for all children (clone and not) if __WALL is set; * otherwise, wait for clone children *only* if __WCLONE is * set; otherwise, wait for non-clone children *only*. (Note: @@ -1128,10 +1126,6 @@ static int eligible_child(struct wait_opts *wo, struct task_struct *p) && !(wo->wo_flags & __WALL)) return 0; - err = security_task_wait(p); - if (err) - return err; - return 1; } @@ -1144,18 +1138,20 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, put_task_struct(p); infop = wo->wo_info; - if (!retval) - retval = put_user(SIGCHLD, &infop->si_signo); - if (!retval) - retval = put_user(0, &infop->si_errno); - if (!retval) - retval = put_user((short)why, &infop->si_code); - if (!retval) - retval = put_user(pid, &infop->si_pid); - if (!retval) - retval = put_user(uid, &infop->si_uid); - if (!retval) - retval = put_user(status, &infop->si_status); + if (infop) { + if (!retval) + retval = put_user(SIGCHLD, &infop->si_signo); + if (!retval) + retval = put_user(0, &infop->si_errno); + if (!retval) + retval = put_user((short)why, &infop->si_code); + if (!retval) + retval = put_user(pid, &infop->si_pid); + if (!retval) + retval = put_user(uid, &infop->si_uid); + if (!retval) + retval = put_user(status, &infop->si_status); + } if (!retval) retval = pid; return retval; @@ -1485,13 +1481,14 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) * then ->notask_error is 0 if @p is an eligible child, * or another error from security_task_wait(), or still -ECHILD. */ -static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent, - int ptrace, struct task_struct *p) +static int wait_consider_task(struct wait_opts *wo, int ptrace, + struct task_struct *p) { int ret = eligible_child(wo, p); if (!ret) return ret; + ret = security_task_wait(p); if (unlikely(ret < 0)) { /* * If we have not yet seen any eligible child, @@ -1553,7 +1550,7 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) * Do not consider detached threads. */ if (!task_detached(p)) { - int ret = wait_consider_task(wo, tsk, 0, p); + int ret = wait_consider_task(wo, 0, p); if (ret) return ret; } @@ -1567,7 +1564,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) struct task_struct *p; list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { - int ret = wait_consider_task(wo, tsk, 1, p); + int ret = wait_consider_task(wo, 1, p); if (ret) return ret; } @@ -1575,15 +1572,38 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) return 0; } +static int child_wait_callback(wait_queue_t *wait, unsigned mode, + int sync, void *key) +{ + struct wait_opts *wo = container_of(wait, struct wait_opts, + child_wait); + struct task_struct *p = key; + + if (!eligible_pid(wo, p)) + return 0; + + if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent) + return 0; + + return default_wake_function(wait, mode, sync, key); +} + +void __wake_up_parent(struct task_struct *p, struct task_struct *parent) +{ + __wake_up_sync_key(&parent->signal->wait_chldexit, + TASK_INTERRUPTIBLE, 1, p); +} + static long do_wait(struct wait_opts *wo) { - DECLARE_WAITQUEUE(wait, current); struct task_struct *tsk; int retval; trace_sched_process_wait(wo->wo_pid); - add_wait_queue(¤t->signal->wait_chldexit,&wait); + init_waitqueue_func_entry(&wo->child_wait, child_wait_callback); + wo->child_wait.private = current; + add_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); repeat: /* * If there is nothing that can match our critiera just get out. @@ -1624,32 +1644,7 @@ notask: } end: __set_current_state(TASK_RUNNING); - remove_wait_queue(¤t->signal->wait_chldexit,&wait); - if (wo->wo_info) { - struct siginfo __user *infop = wo->wo_info; - - if (retval > 0) - retval = 0; - else { - /* - * For a WNOHANG return, clear out all the fields - * we would set so the user can easily tell the - * difference. - */ - if (!retval) - retval = put_user(0, &infop->si_signo); - if (!retval) - retval = put_user(0, &infop->si_errno); - if (!retval) - retval = put_user(0, &infop->si_code); - if (!retval) - retval = put_user(0, &infop->si_pid); - if (!retval) - retval = put_user(0, &infop->si_uid); - if (!retval) - retval = put_user(0, &infop->si_status); - } - } + remove_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); return retval; } @@ -1694,6 +1689,29 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, wo.wo_stat = NULL; wo.wo_rusage = ru; ret = do_wait(&wo); + + if (ret > 0) { + ret = 0; + } else if (infop) { + /* + * For a WNOHANG return, clear out all the fields + * we would set so the user can easily tell the + * difference. + */ + if (!ret) + ret = put_user(0, &infop->si_signo); + if (!ret) + ret = put_user(0, &infop->si_errno); + if (!ret) + ret = put_user(0, &infop->si_code); + if (!ret) + ret = put_user(0, &infop->si_pid); + if (!ret) + ret = put_user(0, &infop->si_uid); + if (!ret) + ret = put_user(0, &infop->si_status); + } + put_pid(pid); /* avoid REGPARM breakage on x86: */ diff --git a/kernel/fork.c b/kernel/fork.c index 51ad0b0b726..266c6af6ef1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -434,6 +434,14 @@ __setup("coredump_filter=", coredump_filter_setup); #include <linux/init_task.h> +static void mm_init_aio(struct mm_struct *mm) +{ +#ifdef CONFIG_AIO + spin_lock_init(&mm->ioctx_lock); + INIT_HLIST_HEAD(&mm->ioctx_list); +#endif +} + static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) { atomic_set(&mm->mm_users, 1); @@ -447,10 +455,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) set_mm_counter(mm, file_rss, 0); set_mm_counter(mm, anon_rss, 0); spin_lock_init(&mm->page_table_lock); - spin_lock_init(&mm->ioctx_lock); - INIT_HLIST_HEAD(&mm->ioctx_list); mm->free_area_cache = TASK_UNMAPPED_BASE; mm->cached_hole_size = ~0UL; + mm_init_aio(mm); mm_init_owner(mm, p); if (likely(!mm_alloc_pgd(mm))) { @@ -511,6 +518,8 @@ void mmput(struct mm_struct *mm) spin_unlock(&mmlist_lock); } put_swap_token(mm); + if (mm->binfmt) + module_put(mm->binfmt->module); mmdrop(mm); } } @@ -636,9 +645,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk) mm->hiwater_rss = get_mm_rss(mm); mm->hiwater_vm = mm->total_vm; + if (mm->binfmt && !try_module_get(mm->binfmt->module)) + goto free_pt; + return mm; free_pt: + /* don't put binfmt in mmput, we haven't got module yet */ + mm->binfmt = NULL; mmput(mm); fail_nomem: @@ -979,6 +993,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); + /* + * Siblings of global init remain as zombies on exit since they are + * not reaped by their parent (swapper). To solve this and to avoid + * multi-rooted process trees, prevent global and container-inits + * from creating siblings. + */ + if ((clone_flags & CLONE_PARENT) && + current->signal->flags & SIGNAL_UNKILLABLE) + return ERR_PTR(-EINVAL); + retval = security_task_create(clone_flags); if (retval) goto fork_out; @@ -1020,9 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; - if (p->binfmt && !try_module_get(p->binfmt->module)) - goto bad_fork_cleanup_put_domain; - p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); @@ -1310,9 +1331,6 @@ bad_fork_cleanup_cgroup: #endif cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); - if (p->binfmt) - module_put(p->binfmt->module); -bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 654efd09f6a..70a298d6da7 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -34,7 +34,7 @@ config GCOV_KERNEL config GCOV_PROFILE_ALL bool "Profile entire Kernel" depends on GCOV_KERNEL - depends on S390 || X86 || (PPC && EXPERIMENTAL) + depends on S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE default n ---help--- This options activates profiling for the entire kernel. diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 022a4927b78..d4e84174740 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -171,12 +171,12 @@ static unsigned long timeout_jiffies(unsigned long timeout) * Process updating of timeout sysctl */ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write) goto out; diff --git a/kernel/kmod.c b/kernel/kmod.c index 689d20f3930..9fcb53a11f8 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -143,7 +143,6 @@ struct subprocess_info { static int ____call_usermodehelper(void *data) { struct subprocess_info *sub_info = data; - enum umh_wait wait = sub_info->wait; int retval; BUG_ON(atomic_read(&sub_info->cred->usage) != 1); @@ -185,14 +184,10 @@ static int ____call_usermodehelper(void *data) */ set_user_nice(current, 0); - if (wait == UMH_WAIT_EXEC) - complete(sub_info->complete); - retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp); /* Exec failed? */ - if (wait != UMH_WAIT_EXEC) - sub_info->retval = retval; + sub_info->retval = retval; do_exit(0); } @@ -271,14 +266,16 @@ static void __call_usermodehelper(struct work_struct *work) switch (wait) { case UMH_NO_WAIT: - case UMH_WAIT_EXEC: break; case UMH_WAIT_PROC: if (pid > 0) break; sub_info->retval = pid; - break; + /* FALLTHROUGH */ + + case UMH_WAIT_EXEC: + complete(sub_info->complete); } } diff --git a/kernel/module.c b/kernel/module.c index e6bc4b28aa6..5a29397ca4b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1797,6 +1797,17 @@ static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, } } +static void free_modinfo(struct module *mod) +{ + struct module_attribute *attr; + int i; + + for (i = 0; (attr = modinfo_attrs[i]); i++) { + if (attr->free) + attr->free(mod); + } +} + #ifdef CONFIG_KALLSYMS /* lookup symbol in given range of kernel_symbols */ @@ -1862,13 +1873,93 @@ static char elf_type(const Elf_Sym *sym, return '?'; } +static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, + unsigned int shnum) +{ + const Elf_Shdr *sec; + + if (src->st_shndx == SHN_UNDEF + || src->st_shndx >= shnum + || !src->st_name) + return false; + + sec = sechdrs + src->st_shndx; + if (!(sec->sh_flags & SHF_ALLOC) +#ifndef CONFIG_KALLSYMS_ALL + || !(sec->sh_flags & SHF_EXECINSTR) +#endif + || (sec->sh_entsize & INIT_OFFSET_MASK)) + return false; + + return true; +} + +static unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + unsigned int strindex, + const Elf_Ehdr *hdr, + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) +{ + unsigned long symoffs; + Elf_Shdr *symsect = sechdrs + symindex; + Elf_Shdr *strsect = sechdrs + strindex; + const Elf_Sym *src; + const char *strtab; + unsigned int i, nsrc, ndst; + + /* Put symbol section at end of init part of module. */ + symsect->sh_flags |= SHF_ALLOC; + symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, + symindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + symsect->sh_name); + + src = (void *)hdr + symsect->sh_offset; + nsrc = symsect->sh_size / sizeof(*src); + strtab = (void *)hdr + strsect->sh_offset; + for (ndst = i = 1; i < nsrc; ++i, ++src) + if (is_core_symbol(src, sechdrs, hdr->e_shnum)) { + unsigned int j = src->st_name; + + while(!__test_and_set_bit(j, strmap) && strtab[j]) + ++j; + ++ndst; + } + + /* Append room for core symbols at end of core part. */ + symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); + mod->core_size = symoffs + ndst * sizeof(Elf_Sym); + + /* Put string table section at end of init part of module. */ + strsect->sh_flags |= SHF_ALLOC; + strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, + strindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + strsect->sh_name); + + /* Append room for core symbols' strings at end of core part. */ + *pstroffs = mod->core_size; + __set_bit(0, strmap); + mod->core_size += bitmap_weight(strmap, strsect->sh_size); + + return symoffs; +} + static void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, - const char *secstrings) + unsigned long symoffs, + unsigned long stroffs, + const char *secstrings, + unsigned long *strmap) { - unsigned int i; + unsigned int i, ndst; + const Elf_Sym *src; + Elf_Sym *dst; + char *s; mod->symtab = (void *)sechdrs[symindex].sh_addr; mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); @@ -1878,13 +1969,44 @@ static void add_kallsyms(struct module *mod, for (i = 0; i < mod->num_symtab; i++) mod->symtab[i].st_info = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); + + mod->core_symtab = dst = mod->module_core + symoffs; + src = mod->symtab; + *dst = *src; + for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { + if (!is_core_symbol(src, sechdrs, shnum)) + continue; + dst[ndst] = *src; + dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name); + ++ndst; + } + mod->core_num_syms = ndst; + + mod->core_strtab = s = mod->module_core + stroffs; + for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i) + if (test_bit(i, strmap)) + *++s = mod->strtab[i]; } #else +static inline unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + unsigned int strindex, + const Elf_Hdr *hdr, + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) +{ +} static inline void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, - const char *secstrings) + unsigned long symoffs, + unsigned long stroffs, + const char *secstrings, + const unsigned long *strmap) { } #endif /* CONFIG_KALLSYMS */ @@ -1959,6 +2081,9 @@ static noinline struct module *load_module(void __user *umod, struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ +#ifdef CONFIG_KALLSYMS + unsigned long symoffs, stroffs, *strmap; +#endif mm_segment_t old_fs; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", @@ -2040,11 +2165,6 @@ static noinline struct module *load_module(void __user *umod, /* Don't keep modinfo and version sections. */ sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; -#ifdef CONFIG_KALLSYMS - /* Keep symbol and string tables for decoding later. */ - sechdrs[symindex].sh_flags |= SHF_ALLOC; - sechdrs[strindex].sh_flags |= SHF_ALLOC; -#endif /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(sechdrs, versindex, mod)) { @@ -2080,6 +2200,13 @@ static noinline struct module *load_module(void __user *umod, goto free_hdr; } + strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size) + * sizeof(long), GFP_KERNEL); + if (!strmap) { + err = -ENOMEM; + goto free_mod; + } + if (find_module(mod->name)) { err = -EEXIST; goto free_mod; @@ -2109,6 +2236,8 @@ static noinline struct module *load_module(void __user *umod, this is done generically; there doesn't appear to be any special cases for the architectures. */ layout_sections(mod, hdr, sechdrs, secstrings); + symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr, + secstrings, &stroffs, strmap); /* Do the allocs. */ ptr = module_alloc_update_bounds(mod->core_size); @@ -2313,7 +2442,10 @@ static noinline struct module *load_module(void __user *umod, percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); - add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, + symoffs, stroffs, secstrings, strmap); + kfree(strmap); + strmap = NULL; if (!mod->taints) { struct _ddebug *debug; @@ -2385,13 +2517,14 @@ static noinline struct module *load_module(void __user *umod, synchronize_sched(); module_arch_cleanup(mod); cleanup: + free_modinfo(mod); kobject_del(&mod->mkobj.kobj); kobject_put(&mod->mkobj.kobj); free_unload: module_unload_free(mod); #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - free_init: percpu_modfree(mod->refptr); + free_init: #endif module_free(mod, mod->module_init); free_core: @@ -2402,6 +2535,7 @@ static noinline struct module *load_module(void __user *umod, percpu_modfree(percpu); free_mod: kfree(args); + kfree(strmap); free_hdr: vfree(hdr); return ERR_PTR(err); @@ -2491,6 +2625,11 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, /* Drop initial reference. */ module_put(mod); trim_init_extable(mod); +#ifdef CONFIG_KALLSYMS + mod->num_symtab = mod->core_num_syms; + mod->symtab = mod->core_symtab; + mod->strtab = mod->core_strtab; +#endif module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index 5aa854f9e5a..2a5dfec8efe 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c @@ -42,8 +42,8 @@ int ns_cgroup_clone(struct task_struct *task, struct pid *pid) * (hence either you are in the same cgroup as task, or in an * ancestor cgroup thereof) */ -static int ns_can_attach(struct cgroup_subsys *ss, - struct cgroup *new_cgroup, struct task_struct *task) +static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup, + struct task_struct *task, bool threadgroup) { if (current != task) { if (!capable(CAP_SYS_ADMIN)) @@ -56,6 +56,18 @@ static int ns_can_attach(struct cgroup_subsys *ss, if (!cgroup_is_descendant(new_cgroup, task)) return -EPERM; + if (threadgroup) { + struct task_struct *c; + rcu_read_lock(); + list_for_each_entry_rcu(c, &task->thread_group, thread_group) { + if (!cgroup_is_descendant(new_cgroup, c)) { + rcu_read_unlock(); + return -EPERM; + } + } + rcu_read_unlock(); + } + return 0; } diff --git a/kernel/params.c b/kernel/params.c index 7f6912ced2b..9da58eabdcb 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -23,6 +23,7 @@ #include <linux/device.h> #include <linux/err.h> #include <linux/slab.h> +#include <linux/ctype.h> #if 0 #define DEBUGP printk @@ -87,7 +88,7 @@ static char *next_arg(char *args, char **param, char **val) } for (i = 0; args[i]; i++) { - if (args[i] == ' ' && !in_quote) + if (isspace(args[i]) && !in_quote) break; if (equals == 0) { if (args[i] == '=') @@ -121,7 +122,7 @@ static char *next_arg(char *args, char **param, char **val) next = args + i; /* Chew up trailing spaces. */ - while (*next == ' ') + while (isspace(*next)) next++; return next; } @@ -138,7 +139,7 @@ int parse_args(const char *name, DEBUGP("Parsing ARGS: %s\n", args); /* Chew leading spaces */ - while (*args == ' ') + while (isspace(*args)) args++; while (*args) { diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 821722ae58a..86b3796b043 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -118,7 +118,7 @@ struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old { if (!(flags & CLONE_NEWPID)) return get_pid_ns(old_ns); - if (flags & CLONE_THREAD) + if (flags & (CLONE_THREAD|CLONE_PARENT)) return ERR_PTR(-EINVAL); return create_pid_namespace(old_ns); } diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 8ba052c86d4..b101cdc4df3 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -13,7 +13,6 @@ #include <linux/module.h> #include <linux/file.h> -#include <linux/utsname.h> #include <linux/delay.h> #include <linux/bitops.h> #include <linux/genhd.h> diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 307c285af59..23bd09cd042 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -266,9 +266,10 @@ static int ignoring_children(struct sighand_struct *sigh) * or self-reaping. Do notification now if it would have happened earlier. * If it should reap itself, return true. * - * If it's our own child, there is no notification to do. - * But if our normal children self-reap, then this child - * was prevented by ptrace and we must reap it now. + * If it's our own child, there is no notification to do. But if our normal + * children self-reap, then this child was prevented by ptrace and we must + * reap it now, in that case we must also wake up sub-threads sleeping in + * do_wait(). */ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) { @@ -278,8 +279,10 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) if (!task_detached(p) && thread_group_empty(p)) { if (!same_thread_group(p->real_parent, tracer)) do_notify_parent(p, p->exit_signal); - else if (ignoring_children(tracer->sighand)) + else if (ignoring_children(tracer->sighand)) { + __wake_up_parent(p, tracer); p->exit_signal = -1; + } } if (task_detached(p)) { /* Mark it as in the process of being reaped. */ diff --git a/kernel/res_counter.c b/kernel/res_counter.c index e1338f07431..88faec23e83 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -19,6 +19,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent) { spin_lock_init(&counter->lock); counter->limit = RESOURCE_MAX; + counter->soft_limit = RESOURCE_MAX; counter->parent = parent; } @@ -36,17 +37,27 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val) } int res_counter_charge(struct res_counter *counter, unsigned long val, - struct res_counter **limit_fail_at) + struct res_counter **limit_fail_at, + struct res_counter **soft_limit_fail_at) { int ret; unsigned long flags; struct res_counter *c, *u; *limit_fail_at = NULL; + if (soft_limit_fail_at) + *soft_limit_fail_at = NULL; local_irq_save(flags); for (c = counter; c != NULL; c = c->parent) { spin_lock(&c->lock); ret = res_counter_charge_locked(c, val); + /* + * With soft limits, we return the highest ancestor + * that exceeds its soft limit + */ + if (soft_limit_fail_at && + !res_counter_soft_limit_check_locked(c)) + *soft_limit_fail_at = c; spin_unlock(&c->lock); if (ret < 0) { *limit_fail_at = c; @@ -74,7 +85,8 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) counter->usage -= val; } -void res_counter_uncharge(struct res_counter *counter, unsigned long val) +void res_counter_uncharge(struct res_counter *counter, unsigned long val, + bool *was_soft_limit_excess) { unsigned long flags; struct res_counter *c; @@ -82,6 +94,9 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val) local_irq_save(flags); for (c = counter; c != NULL; c = c->parent) { spin_lock(&c->lock); + if (was_soft_limit_excess) + *was_soft_limit_excess = + !res_counter_soft_limit_check_locked(c); res_counter_uncharge_locked(c, val); spin_unlock(&c->lock); } @@ -101,6 +116,8 @@ res_counter_member(struct res_counter *counter, int member) return &counter->limit; case RES_FAILCNT: return &counter->failcnt; + case RES_SOFT_LIMIT: + return &counter->soft_limit; }; BUG(); diff --git a/kernel/sched.c b/kernel/sched.c index 2f76e06bea5..ee61f454a98 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -10312,7 +10312,7 @@ static int sched_rt_global_constraints(void) #endif /* CONFIG_RT_GROUP_SCHED */ int sched_rt_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -10323,7 +10323,7 @@ int sched_rt_handler(struct ctl_table *table, int write, old_period = sysctl_sched_rt_period; old_runtime = sysctl_sched_rt_runtime; - ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (!ret && write) { ret = sched_rt_global_constraints(); @@ -10377,8 +10377,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) } static int -cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct task_struct *tsk) +cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) { #ifdef CONFIG_RT_GROUP_SCHED if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) @@ -10388,15 +10387,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, if (tsk->sched_class != &fair_sched_class) return -EINVAL; #endif + return 0; +} +static int +cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct task_struct *tsk, bool threadgroup) +{ + int retval = cpu_cgroup_can_attach_task(cgrp, tsk); + if (retval) + return retval; + if (threadgroup) { + struct task_struct *c; + rcu_read_lock(); + list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { + retval = cpu_cgroup_can_attach_task(cgrp, c); + if (retval) { + rcu_read_unlock(); + return retval; + } + } + rcu_read_unlock(); + } return 0; } static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cont, struct task_struct *tsk) + struct cgroup *old_cont, struct task_struct *tsk, + bool threadgroup) { sched_move_task(tsk); + if (threadgroup) { + struct task_struct *c; + rcu_read_lock(); + list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { + sched_move_task(c); + } + rcu_read_unlock(); + } } #ifdef CONFIG_FAIR_GROUP_SCHED diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ecc637a0d59..4e777b47eed 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) #ifdef CONFIG_SCHED_DEBUG int sched_nr_latency_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write) return ret; diff --git a/kernel/signal.c b/kernel/signal.c index 64c5deeaca5..6705320784f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -705,7 +705,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) if (why) { /* - * The first thread which returns from finish_stop() + * The first thread which returns from do_signal_stop() * will take ->siglock, notice SIGNAL_CLD_MASK, and * notify its parent. See get_signal_to_deliver(). */ @@ -971,6 +971,20 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) return send_signal(sig, info, t, 0); } +int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, + bool group) +{ + unsigned long flags; + int ret = -ESRCH; + + if (lock_task_sighand(p, &flags)) { + ret = send_signal(sig, info, p, group); + unlock_task_sighand(p, &flags); + } + + return ret; +} + /* * Force a signal that the process can't ignore: if necessary * we unblock the signal and change any SIG_IGN to SIG_DFL. @@ -1036,12 +1050,6 @@ void zap_other_threads(struct task_struct *p) } } -int __fatal_signal_pending(struct task_struct *tsk) -{ - return sigismember(&tsk->pending.signal, SIGKILL); -} -EXPORT_SYMBOL(__fatal_signal_pending); - struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) { struct sighand_struct *sighand; @@ -1068,18 +1076,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long */ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) { - unsigned long flags; - int ret; + int ret = check_kill_permission(sig, info, p); - ret = check_kill_permission(sig, info, p); - - if (!ret && sig) { - ret = -ESRCH; - if (lock_task_sighand(p, &flags)) { - ret = __group_send_sig_info(sig, info, p); - unlock_task_sighand(p, &flags); - } - } + if (!ret && sig) + ret = do_send_sig_info(sig, info, p, true); return ret; } @@ -1224,15 +1224,9 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid) * These are for backward compatibility with the rest of the kernel source. */ -/* - * The caller must ensure the task can't exit. - */ int send_sig_info(int sig, struct siginfo *info, struct task_struct *p) { - int ret; - unsigned long flags; - /* * Make sure legacy kernel users don't send in bad values * (normal paths check this in check_kill_permission). @@ -1240,10 +1234,7 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p) if (!valid_signal(sig)) return -EINVAL; - spin_lock_irqsave(&p->sighand->siglock, flags); - ret = specific_send_sig_info(sig, info, p); - spin_unlock_irqrestore(&p->sighand->siglock, flags); - return ret; + return do_send_sig_info(sig, info, p, false); } #define __si_special(priv) \ @@ -1383,15 +1374,6 @@ ret: } /* - * Wake up any threads in the parent blocked in wait* syscalls. - */ -static inline void __wake_up_parent(struct task_struct *p, - struct task_struct *parent) -{ - wake_up_interruptible_sync(&parent->signal->wait_chldexit); -} - -/* * Let a parent know about the death of a child. * For a stopped/continued status change, use do_notify_parent_cldstop instead. * @@ -1673,29 +1655,6 @@ void ptrace_notify(int exit_code) spin_unlock_irq(¤t->sighand->siglock); } -static void -finish_stop(int stop_count) -{ - /* - * If there are no other threads in the group, or if there is - * a group stop in progress and we are the last to stop, - * report to the parent. When ptraced, every thread reports itself. - */ - if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) { - read_lock(&tasklist_lock); - do_notify_parent_cldstop(current, CLD_STOPPED); - read_unlock(&tasklist_lock); - } - - do { - schedule(); - } while (try_to_freeze()); - /* - * Now we don't run again until continued. - */ - current->exit_code = 0; -} - /* * This performs the stopping for SIGSTOP and other stop signals. * We have to stop all threads in the thread group. @@ -1705,15 +1664,9 @@ finish_stop(int stop_count) static int do_signal_stop(int signr) { struct signal_struct *sig = current->signal; - int stop_count; + int notify; - if (sig->group_stop_count > 0) { - /* - * There is a group stop in progress. We don't need to - * start another one. - */ - stop_count = --sig->group_stop_count; - } else { + if (!sig->group_stop_count) { struct task_struct *t; if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || @@ -1725,7 +1678,7 @@ static int do_signal_stop(int signr) */ sig->group_exit_code = signr; - stop_count = 0; + sig->group_stop_count = 1; for (t = next_thread(current); t != current; t = next_thread(t)) /* * Setting state to TASK_STOPPED for a group @@ -1734,19 +1687,44 @@ static int do_signal_stop(int signr) */ if (!(t->flags & PF_EXITING) && !task_is_stopped_or_traced(t)) { - stop_count++; + sig->group_stop_count++; signal_wake_up(t, 0); } - sig->group_stop_count = stop_count; } + /* + * If there are no other threads in the group, or if there is + * a group stop in progress and we are the last to stop, report + * to the parent. When ptraced, every thread reports itself. + */ + notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0; + notify = tracehook_notify_jctl(notify, CLD_STOPPED); + /* + * tracehook_notify_jctl() can drop and reacquire siglock, so + * we keep ->group_stop_count != 0 before the call. If SIGCONT + * or SIGKILL comes in between ->group_stop_count == 0. + */ + if (sig->group_stop_count) { + if (!--sig->group_stop_count) + sig->flags = SIGNAL_STOP_STOPPED; + current->exit_code = sig->group_exit_code; + __set_current_state(TASK_STOPPED); + } + spin_unlock_irq(¤t->sighand->siglock); - if (stop_count == 0) - sig->flags = SIGNAL_STOP_STOPPED; - current->exit_code = sig->group_exit_code; - __set_current_state(TASK_STOPPED); + if (notify) { + read_lock(&tasklist_lock); + do_notify_parent_cldstop(current, notify); + read_unlock(&tasklist_lock); + } + + /* Now we don't run again until woken by SIGCONT or SIGKILL */ + do { + schedule(); + } while (try_to_freeze()); + + tracehook_finish_jctl(); + current->exit_code = 0; - spin_unlock_irq(¤t->sighand->siglock); - finish_stop(stop_count); return 1; } @@ -1815,14 +1793,15 @@ relock: int why = (signal->flags & SIGNAL_STOP_CONTINUED) ? CLD_CONTINUED : CLD_STOPPED; signal->flags &= ~SIGNAL_CLD_MASK; - spin_unlock_irq(&sighand->siglock); - if (unlikely(!tracehook_notify_jctl(1, why))) - goto relock; + why = tracehook_notify_jctl(why, CLD_CONTINUED); + spin_unlock_irq(&sighand->siglock); - read_lock(&tasklist_lock); - do_notify_parent_cldstop(current->group_leader, why); - read_unlock(&tasklist_lock); + if (why) { + read_lock(&tasklist_lock); + do_notify_parent_cldstop(current->group_leader, why); + read_unlock(&tasklist_lock); + } goto relock; } @@ -1987,14 +1966,14 @@ void exit_signals(struct task_struct *tsk) if (unlikely(tsk->signal->group_stop_count) && !--tsk->signal->group_stop_count) { tsk->signal->flags = SIGNAL_STOP_STOPPED; - group_stop = 1; + group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED); } out: spin_unlock_irq(&tsk->sighand->siglock); - if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) { + if (unlikely(group_stop)) { read_lock(&tasklist_lock); - do_notify_parent_cldstop(tsk, CLD_STOPPED); + do_notify_parent_cldstop(tsk, group_stop); read_unlock(&tasklist_lock); } } @@ -2290,7 +2269,6 @@ static int do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) { struct task_struct *p; - unsigned long flags; int error = -ESRCH; rcu_read_lock(); @@ -2300,14 +2278,16 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) /* * The null signal is a permissions and process existence * probe. No signal is actually delivered. - * - * If lock_task_sighand() fails we pretend the task dies - * after receiving the signal. The window is tiny, and the - * signal is private anyway. */ - if (!error && sig && lock_task_sighand(p, &flags)) { - error = specific_send_sig_info(sig, info, p); - unlock_task_sighand(p, &flags); + if (!error && sig) { + error = do_send_sig_info(sig, info, p, false); + /* + * If lock_task_sighand() failed we pretend the task + * dies after receiving the signal. The window is tiny, + * and the signal is private anyway. + */ + if (unlikely(error == -ESRCH)) + error = 0; } } rcu_read_unlock(); diff --git a/kernel/slow-work.c b/kernel/slow-work.c index 09d7519557d..0d31135efbf 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -26,10 +26,10 @@ static void slow_work_cull_timeout(unsigned long); static void slow_work_oom_timeout(unsigned long); #ifdef CONFIG_SYSCTL -static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *, +static int slow_work_min_threads_sysctl(struct ctl_table *, int, void __user *, size_t *, loff_t *); -static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *, +static int slow_work_max_threads_sysctl(struct ctl_table *, int , void __user *, size_t *, loff_t *); #endif @@ -493,10 +493,10 @@ static void slow_work_oom_timeout(unsigned long data) * Handle adjustment of the minimum number of threads */ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); int n; if (ret == 0) { @@ -521,10 +521,10 @@ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, * Handle adjustment of the maximum number of threads */ static int slow_work_max_threads_sysctl(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); int n; if (ret == 0) { diff --git a/kernel/smp.c b/kernel/smp.c index fd47a256a24..c9d1c7835c2 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -347,13 +347,6 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, generic_exec_single(cpu, data, wait); } -/* Deprecated: shim for archs using old arch_send_call_function_ipi API. */ - -#ifndef arch_send_call_function_ipi_mask -# define arch_send_call_function_ipi_mask(maskp) \ - arch_send_call_function_ipi(*(maskp)) -#endif - /** * smp_call_function_many(): Run a function on a set of other CPUs. * @mask: The set of cpus to run on (only runs on online subset). diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 88796c33083..81324d12eb3 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -90,11 +90,11 @@ void touch_all_softlockup_watchdogs(void) EXPORT_SYMBOL(touch_all_softlockup_watchdogs); int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { touch_all_softlockup_watchdogs(); - return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } /* diff --git a/kernel/sys.c b/kernel/sys.c index ebcb1561172..255475d163e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1542,6 +1542,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, current->timer_slack_ns = arg2; error = 0; break; + case PR_MCE_KILL: + if (arg4 | arg5) + return -EINVAL; + switch (arg2) { + case 0: + if (arg3 != 0) + return -EINVAL; + current->flags &= ~PF_MCE_PROCESS; + break; + case 1: + current->flags |= PF_MCE_PROCESS; + if (arg3 != 0) + current->flags |= PF_MCE_EARLY; + else + current->flags &= ~PF_MCE_EARLY; + break; + default: + return -EINVAL; + } + error = 0; + break; + default: error = -EINVAL; break; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 515bc230ac2..e06d0b8d195 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -49,6 +49,7 @@ cond_syscall(sys_sendmsg); cond_syscall(compat_sys_sendmsg); cond_syscall(sys_recvmsg); cond_syscall(compat_sys_recvmsg); +cond_syscall(compat_sys_recvfrom); cond_syscall(sys_socketcall); cond_syscall(sys_futex); cond_syscall(compat_sys_futex); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0dfaa47d7cb..0d949c51741 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -26,7 +26,6 @@ #include <linux/proc_fs.h> #include <linux/security.h> #include <linux/ctype.h> -#include <linux/utsname.h> #include <linux/kmemcheck.h> #include <linux/smp_lock.h> #include <linux/fs.h> @@ -77,6 +76,7 @@ extern int max_threads; extern int core_uses_pid; extern int suid_dumpable; extern char core_pattern[]; +extern unsigned int core_pipe_limit; extern int pid_max; extern int min_free_kbytes; extern int pid_max_min, pid_max_max; @@ -163,9 +163,9 @@ extern int max_lock_depth; #endif #ifdef CONFIG_PROC_SYSCTL -static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, +static int proc_do_cad_pid(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -static int proc_taint(struct ctl_table *table, int write, struct file *filp, +static int proc_taint(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif @@ -424,6 +424,14 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dostring, .strategy = &sysctl_string, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "core_pipe_limit", + .data = &core_pipe_limit, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", @@ -1390,6 +1398,31 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &scan_unevictable_handler, }, +#ifdef CONFIG_MEMORY_FAILURE + { + .ctl_name = CTL_UNNUMBERED, + .procname = "memory_failure_early_kill", + .data = &sysctl_memory_failure_early_kill, + .maxlen = sizeof(sysctl_memory_failure_early_kill), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "memory_failure_recovery", + .data = &sysctl_memory_failure_recovery, + .maxlen = sizeof(sysctl_memory_failure_recovery), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one, + }, +#endif + /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt @@ -2218,7 +2251,7 @@ void sysctl_head_put(struct ctl_table_header *head) #ifdef CONFIG_PROC_SYSCTL static int _proc_do_string(void* data, int maxlen, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { size_t len; @@ -2279,7 +2312,6 @@ static int _proc_do_string(void* data, int maxlen, int write, * proc_dostring - read a string sysctl * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2293,10 +2325,10 @@ static int _proc_do_string(void* data, int maxlen, int write, * * Returns 0 on success. */ -int proc_dostring(struct ctl_table *table, int write, struct file *filp, +int proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return _proc_do_string(table->data, table->maxlen, write, filp, + return _proc_do_string(table->data, table->maxlen, write, buffer, lenp, ppos); } @@ -2321,7 +2353,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, } static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, - int write, struct file *filp, void __user *buffer, + int write, void __user *buffer, size_t *lenp, loff_t *ppos, int (*conv)(int *negp, unsigned long *lvalp, int *valp, int write, void *data), @@ -2428,13 +2460,13 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, #undef TMPBUFLEN } -static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp, +static int do_proc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos, int (*conv)(int *negp, unsigned long *lvalp, int *valp, int write, void *data), void *data) { - return __do_proc_dointvec(table->data, table, write, filp, + return __do_proc_dointvec(table->data, table, write, buffer, lenp, ppos, conv, data); } @@ -2442,7 +2474,6 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil * proc_dointvec - read a vector of integers * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2452,10 +2483,10 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil * * Returns 0 on success. */ -int proc_dointvec(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, + return do_proc_dointvec(table,write,buffer,lenp,ppos, NULL,NULL); } @@ -2463,7 +2494,7 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp, * Taint values can only be increased * This means we can safely use a temporary. */ -static int proc_taint(struct ctl_table *table, int write, struct file *filp, +static int proc_taint(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; @@ -2475,7 +2506,7 @@ static int proc_taint(struct ctl_table *table, int write, struct file *filp, t = *table; t.data = &tmptaint; - err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos); + err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos); if (err < 0) return err; @@ -2527,7 +2558,6 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, * proc_dointvec_minmax - read a vector of integers with min/max values * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2540,19 +2570,18 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, * * Returns 0 on success. */ -int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct do_proc_dointvec_minmax_conv_param param = { .min = (int *) table->extra1, .max = (int *) table->extra2, }; - return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, + return do_proc_dointvec(table, write, buffer, lenp, ppos, do_proc_dointvec_minmax_conv, ¶m); } static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, @@ -2657,21 +2686,19 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int } static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, unsigned long convdiv) { return __do_proc_doulongvec_minmax(table->data, table, write, - filp, buffer, lenp, ppos, convmul, convdiv); + buffer, lenp, ppos, convmul, convdiv); } /** * proc_doulongvec_minmax - read a vector of long integers with min/max values * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2684,17 +2711,16 @@ static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, * * Returns 0 on success. */ -int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, +int proc_doulongvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l); + return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l); } /** * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2709,11 +2735,10 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp * Returns 0 on success. */ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_doulongvec_minmax(table, write, filp, buffer, + return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, HZ, 1000l); } @@ -2789,7 +2814,6 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, * proc_dointvec_jiffies - read a vector of integers as seconds * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2801,10 +2825,10 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, * * Returns 0 on success. */ -int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, + return do_proc_dointvec(table,write,buffer,lenp,ppos, do_proc_dointvec_jiffies_conv,NULL); } @@ -2812,7 +2836,6 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: pointer to the file position @@ -2824,10 +2847,10 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, * * Returns 0 on success. */ -int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, + return do_proc_dointvec(table,write,buffer,lenp,ppos, do_proc_dointvec_userhz_jiffies_conv,NULL); } @@ -2835,7 +2858,6 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds * @table: the sysctl table * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer * @ppos: file position @@ -2848,14 +2870,14 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file * * Returns 0 on success. */ -int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, + return do_proc_dointvec(table, write, buffer, lenp, ppos, do_proc_dointvec_ms_jiffies_conv, NULL); } -static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, +static int proc_do_cad_pid(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct pid *new_pid; @@ -2864,7 +2886,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp tmp = pid_vnr(cad_pid); - r = __do_proc_dointvec(&tmp, table, write, filp, buffer, + r = __do_proc_dointvec(&tmp, table, write, buffer, lenp, ppos, NULL, NULL); if (r || !write) return r; @@ -2879,50 +2901,49 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp #else /* CONFIG_PROC_FS */ -int proc_dostring(struct ctl_table *table, int write, struct file *filp, +int proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, +int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } -int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, +int proc_doulongvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 0b0a6366c9d..ee266620b06 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -1,4 +1,4 @@ -obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o +obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c new file mode 100644 index 00000000000..86628e755f3 --- /dev/null +++ b/kernel/time/timeconv.c @@ -0,0 +1,127 @@ +/* + * Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. + * This file is part of the GNU C Library. + * Contributed by Paul Eggert (eggert@twinsun.com). + * + * The GNU C Library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * The GNU C Library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with the GNU C Library; see the file COPYING.LIB. If not, + * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* + * Converts the calendar time to broken-down time representation + * Based on code from glibc-2.6 + * + * 2009-7-14: + * Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com> + */ + +#include <linux/time.h> +#include <linux/module.h> + +/* + * Nonzero if YEAR is a leap year (every 4 years, + * except every 100th isn't, and every 400th is). + */ +static int __isleap(long year) +{ + return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0); +} + +/* do a mathdiv for long type */ +static long math_div(long a, long b) +{ + return a / b - (a % b < 0); +} + +/* How many leap years between y1 and y2, y1 must less or equal to y2 */ +static long leaps_between(long y1, long y2) +{ + long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100) + + math_div(y1 - 1, 400); + long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100) + + math_div(y2 - 1, 400); + return leaps2 - leaps1; +} + +/* How many days come before each month (0-12). */ +static const unsigned short __mon_yday[2][13] = { + /* Normal years. */ + {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, + /* Leap years. */ + {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366} +}; + +#define SECS_PER_HOUR (60 * 60) +#define SECS_PER_DAY (SECS_PER_HOUR * 24) + +/** + * time_to_tm - converts the calendar time to local broken-down time + * + * @totalsecs the number of seconds elapsed since 00:00:00 on January 1, 1970, + * Coordinated Universal Time (UTC). + * @offset offset seconds adding to totalsecs. + * @result pointer to struct tm variable to receive broken-down time + */ +void time_to_tm(time_t totalsecs, int offset, struct tm *result) +{ + long days, rem, y; + const unsigned short *ip; + + days = totalsecs / SECS_PER_DAY; + rem = totalsecs % SECS_PER_DAY; + rem += offset; + while (rem < 0) { + rem += SECS_PER_DAY; + --days; + } + while (rem >= SECS_PER_DAY) { + rem -= SECS_PER_DAY; + ++days; + } + + result->tm_hour = rem / SECS_PER_HOUR; + rem %= SECS_PER_HOUR; + result->tm_min = rem / 60; + result->tm_sec = rem % 60; + + /* January 1, 1970 was a Thursday. */ + result->tm_wday = (4 + days) % 7; + if (result->tm_wday < 0) + result->tm_wday += 7; + + y = 1970; + + while (days < 0 || days >= (__isleap(y) ? 366 : 365)) { + /* Guess a corrected year, assuming 365 days per year. */ + long yg = y + math_div(days, 365); + + /* Adjust DAYS and Y to match the guessed year. */ + days -= (yg - y) * 365 + leaps_between(y, yg); + y = yg; + } + + result->tm_year = y - 1900; + + result->tm_yday = days; + + ip = __mon_yday[__isleap(y)]; + for (y = 11; days < ip[y]; y--) + continue; + days -= ip[y]; + + result->tm_mon = y; + result->tm_mday = days + 1; +} +EXPORT_SYMBOL(time_to_tm); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 23df7771c93..a142579765b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3015,7 +3015,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) int ftrace_enable_sysctl(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -3025,7 +3025,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, mutex_lock(&ftrace_lock); - ret = proc_dointvec(table, write, file, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) goto out; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6c0f6a8a22e..411af37f4be 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1984,11 +1984,9 @@ __tracing_open(struct inode *inode, struct file *file) if (current_trace) *iter->trace = *current_trace; - if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) + if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) goto fail; - cpumask_clear(iter->started); - if (current_trace && current_trace->print_max) iter->tr = &max_tr; else @@ -4389,7 +4387,7 @@ __init static int tracer_alloc_buffers(void) if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) goto out_free_buffer_mask; - if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) goto out_free_tracing_cpumask; /* To save memory, keep the ring buffer size to its minimum */ @@ -4400,7 +4398,6 @@ __init static int tracer_alloc_buffers(void) cpumask_copy(tracing_buffer_mask, cpu_possible_mask); cpumask_copy(tracing_cpumask, cpu_all_mask); - cpumask_clear(tracing_reader_cpumask); /* TODO: make the number of buffers hot pluggable with CPUS */ global_trace.buffer = ring_buffer_alloc(ring_buf_size, diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 0f6facb050a..8504ac71e4e 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -296,14 +296,14 @@ static const struct file_operations stack_trace_fops = { int stack_trace_sysctl(struct ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; mutex_lock(&stack_sysctl_mutex); - ret = proc_dointvec(table, write, file, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret || !write || (last_stack_tracer_enabled == !!stack_tracer_enabled)) diff --git a/kernel/uid16.c b/kernel/uid16.c index 0314501688b..419209893d8 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -4,7 +4,6 @@ */ #include <linux/mm.h> -#include <linux/utsname.h> #include <linux/mman.h> #include <linux/notifier.h> #include <linux/reboot.h> diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 92359cc747a..69eae358a72 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -42,14 +42,14 @@ static void put_uts(ctl_table *table, int write, void *which) * Special case of dostring for the UTS structure. This has locks * to observe. Should this be in kernel/sys.c ???? */ -static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, +static int proc_do_uts_string(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table uts_table; int r; memcpy(&uts_table, table, sizeof(uts_table)); uts_table.data = get_uts(table, write); - r = proc_dostring(&uts_table,write,filp,buffer,lenp, ppos); + r = proc_dostring(&uts_table,write,buffer,lenp, ppos); put_uts(table, write, uts_table.data); return r; } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d57b12f59c8..891155817bc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -50,6 +50,14 @@ config MAGIC_SYSRQ keys are documented in <file:Documentation/sysrq.txt>. Don't say Y unless you really know what this hack does. +config STRIP_ASM_SYMS + bool "Strip assembler-generated symbols during link" + default n + help + Strip internal assembler-generated symbols during a link (symbols + that look like '.Lxxx') so they don't pollute the output of + get_wchan() and suchlike. + config UNUSED_SYMBOLS bool "Enable unused/obsolete exported symbols" default y if X86 diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index 68dfce59c1b..fc686c7a0a0 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -27,6 +27,11 @@ #define GZIP_IOBUF_SIZE (16*1024) +static int nofill(void *buffer, unsigned int len) +{ + return -1; +} + /* Included from initramfs et al code */ STATIC int INIT gunzip(unsigned char *buf, int len, int(*fill)(void*, unsigned int), @@ -76,6 +81,9 @@ STATIC int INIT gunzip(unsigned char *buf, int len, goto gunzip_nomem4; } + if (!fill) + fill = nofill; + if (len == 0) len = fill(zbuf, GZIP_IOBUF_SIZE); diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 0b954e04bd3..ca82fde81c8 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -82,6 +82,11 @@ struct rc { #define RC_MODEL_TOTAL_BITS 11 +static int nofill(void *buffer, unsigned int len) +{ + return -1; +} + /* Called twice: once at startup and once in rc_normalize() */ static void INIT rc_read(struct rc *rc) { @@ -97,7 +102,10 @@ static inline void INIT rc_init(struct rc *rc, int (*fill)(void*, unsigned int), char *buffer, int buffer_size) { - rc->fill = fill; + if (fill) + rc->fill = fill; + else + rc->fill = nofill; rc->buffer = (uint8_t *)buffer; rc->buffer_size = buffer_size; rc->buffer_end = rc->buffer + rc->buffer_size; diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 73a14b8c6d1..b91839e9e89 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -671,7 +671,7 @@ static char *ip4_string(char *p, const u8 *addr, bool leading_zeros) return p; } -static char *ip6_compressed_string(char *p, const struct in6_addr *addr) +static char *ip6_compressed_string(char *p, const char *addr) { int i; int j; @@ -683,7 +683,12 @@ static char *ip6_compressed_string(char *p, const struct in6_addr *addr) u8 hi; u8 lo; bool needcolon = false; - bool useIPv4 = ipv6_addr_v4mapped(addr) || ipv6_addr_is_isatap(addr); + bool useIPv4; + struct in6_addr in6; + + memcpy(&in6, addr, sizeof(struct in6_addr)); + + useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6); memset(zerolength, 0, sizeof(zerolength)); @@ -695,7 +700,7 @@ static char *ip6_compressed_string(char *p, const struct in6_addr *addr) /* find position of longest 0 run */ for (i = 0; i < range; i++) { for (j = i; j < range; j++) { - if (addr->s6_addr16[j] != 0) + if (in6.s6_addr16[j] != 0) break; zerolength[i]++; } @@ -722,7 +727,7 @@ static char *ip6_compressed_string(char *p, const struct in6_addr *addr) needcolon = false; } /* hex u16 without leading 0s */ - word = ntohs(addr->s6_addr16[i]); + word = ntohs(in6.s6_addr16[i]); hi = word >> 8; lo = word & 0xff; if (hi) { @@ -741,19 +746,19 @@ static char *ip6_compressed_string(char *p, const struct in6_addr *addr) if (useIPv4) { if (needcolon) *p++ = ':'; - p = ip4_string(p, &addr->s6_addr[12], false); + p = ip4_string(p, &in6.s6_addr[12], false); } *p = '\0'; return p; } -static char *ip6_string(char *p, const struct in6_addr *addr, const char *fmt) +static char *ip6_string(char *p, const char *addr, const char *fmt) { int i; for (i = 0; i < 8; i++) { - p = pack_hex_byte(p, addr->s6_addr[2 * i]); - p = pack_hex_byte(p, addr->s6_addr[2 * i + 1]); + p = pack_hex_byte(p, *addr++); + p = pack_hex_byte(p, *addr++); if (fmt[0] == 'I' && i != 7) *p++ = ':'; } @@ -768,9 +773,9 @@ static char *ip6_addr_string(char *buf, char *end, const u8 *addr, char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")]; if (fmt[0] == 'I' && fmt[2] == 'c') - ip6_compressed_string(ip6_addr, (const struct in6_addr *)addr); + ip6_compressed_string(ip6_addr, addr); else - ip6_string(ip6_addr, (const struct in6_addr *)addr, fmt); + ip6_string(ip6_addr, addr, fmt); return string(buf, end, ip6_addr, spec); } diff --git a/mm/Kconfig b/mm/Kconfig index 71eb0b4cce8..24776072959 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -245,6 +245,20 @@ config DEFAULT_MMAP_MIN_ADDR /proc/sys/vm/mmap_min_addr tunable. +config MEMORY_FAILURE + depends on MMU + depends on X86_MCE + bool "Enable recovery from hardware memory errors" + help + Enables code to recover from some memory failures on systems + with MCA recovery. This allows a system to continue running + even when some of its memory has uncorrected errors. This requires + special hardware support and typically ECC memory. + +config HWPOISON_INJECT + tristate "Poison pages injector" + depends on MEMORY_FAILURE && DEBUG_KERNEL + config NOMMU_INITIAL_TRIM_EXCESS int "Turn on mmap() excess space trimming before booting" depends on !MMU diff --git a/mm/Makefile b/mm/Makefile index 88193d73cd1..515fd793c17 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -41,5 +41,7 @@ obj-$(CONFIG_SMP) += allocpercpu.o endif obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o +obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o +obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o diff --git a/mm/filemap.c b/mm/filemap.c index bcc7372aebb..6c84e598b4a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -58,7 +58,7 @@ /* * Lock ordering: * - * ->i_mmap_lock (vmtruncate) + * ->i_mmap_lock (truncate_pagecache) * ->private_lock (__free_pte->__set_page_dirty_buffers) * ->swap_lock (exclusive_swap_page, others) * ->mapping->tree_lock @@ -104,6 +104,10 @@ * * ->task->proc_lock * ->dcache_lock (proc_pid_lookup) + * + * (code doesn't rely on that order, so you could switch it around) + * ->tasklist_lock (memory_failure, collect_procs_ao) + * ->i_mmap_lock */ /* diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 815dbd4a6dc..6f048fcc749 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1537,7 +1537,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array) #ifdef CONFIG_SYSCTL int hugetlb_sysctl_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, + void __user *buffer, size_t *length, loff_t *ppos) { struct hstate *h = &default_hstate; @@ -1548,7 +1548,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, table->data = &tmp; table->maxlen = sizeof(unsigned long); - proc_doulongvec_minmax(table, write, file, buffer, length, ppos); + proc_doulongvec_minmax(table, write, buffer, length, ppos); if (write) h->max_huge_pages = set_max_huge_pages(h, tmp); @@ -1557,10 +1557,10 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, } int hugetlb_treat_movable_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); if (hugepages_treat_as_movable) htlb_alloc_mask = GFP_HIGHUSER_MOVABLE; else @@ -1569,7 +1569,7 @@ int hugetlb_treat_movable_handler(struct ctl_table *table, int write, } int hugetlb_overcommit_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, + void __user *buffer, size_t *length, loff_t *ppos) { struct hstate *h = &default_hstate; @@ -1580,7 +1580,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, table->data = &tmp; table->maxlen = sizeof(unsigned long); - proc_doulongvec_minmax(table, write, file, buffer, length, ppos); + proc_doulongvec_minmax(table, write, buffer, length, ppos); if (write) { spin_lock(&hugetlb_lock); diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c new file mode 100644 index 00000000000..e1d85137f08 --- /dev/null +++ b/mm/hwpoison-inject.c @@ -0,0 +1,41 @@ +/* Inject a hwpoison memory failure on a arbitary pfn */ +#include <linux/module.h> +#include <linux/debugfs.h> +#include <linux/kernel.h> +#include <linux/mm.h> + +static struct dentry *hwpoison_dir, *corrupt_pfn; + +static int hwpoison_inject(void *data, u64 val) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + printk(KERN_INFO "Injecting memory failure at pfn %Lx\n", val); + return __memory_failure(val, 18, 0); +} + +DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n"); + +static void pfn_inject_exit(void) +{ + if (hwpoison_dir) + debugfs_remove_recursive(hwpoison_dir); +} + +static int pfn_inject_init(void) +{ + hwpoison_dir = debugfs_create_dir("hwpoison", NULL); + if (hwpoison_dir == NULL) + return -ENOMEM; + corrupt_pfn = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir, + NULL, &hwpoison_fops); + if (corrupt_pfn == NULL) { + pfn_inject_exit(); + return -ENOMEM; + } + return 0; +} + +module_init(pfn_inject_init); +module_exit(pfn_inject_exit); +MODULE_LICENSE("GPL"); @@ -30,6 +30,7 @@ #include <linux/slab.h> #include <linux/rbtree.h> #include <linux/mmu_notifier.h> +#include <linux/swap.h> #include <linux/ksm.h> #include <asm/tlbflush.h> @@ -162,10 +163,10 @@ static unsigned long ksm_pages_unshared; static unsigned long ksm_rmap_items; /* Limit on the number of unswappable pages used */ -static unsigned long ksm_max_kernel_pages = 2000; +static unsigned long ksm_max_kernel_pages; /* Number of pages ksmd should scan in one batch */ -static unsigned int ksm_thread_pages_to_scan = 200; +static unsigned int ksm_thread_pages_to_scan = 100; /* Milliseconds ksmd should sleep between batches */ static unsigned int ksm_thread_sleep_millisecs = 20; @@ -173,7 +174,7 @@ static unsigned int ksm_thread_sleep_millisecs = 20; #define KSM_RUN_STOP 0 #define KSM_RUN_MERGE 1 #define KSM_RUN_UNMERGE 2 -static unsigned int ksm_run = KSM_RUN_MERGE; +static unsigned int ksm_run = KSM_RUN_STOP; static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait); static DEFINE_MUTEX(ksm_thread_mutex); @@ -183,6 +184,11 @@ static DEFINE_SPINLOCK(ksm_mmlist_lock); sizeof(struct __struct), __alignof__(struct __struct),\ (__flags), NULL) +static void __init ksm_init_max_kernel_pages(void) +{ + ksm_max_kernel_pages = nr_free_buffer_pages() / 4; +} + static int __init ksm_slab_init(void) { rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0); @@ -1667,6 +1673,8 @@ static int __init ksm_init(void) struct task_struct *ksm_thread; int err; + ksm_init_max_kernel_pages(); + err = ksm_slab_init(); if (err) goto out; diff --git a/mm/madvise.c b/mm/madvise.c index d9ae2067952..35b1479b7c9 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -218,6 +218,32 @@ static long madvise_remove(struct vm_area_struct *vma, return error; } +#ifdef CONFIG_MEMORY_FAILURE +/* + * Error injection support for memory error handling. + */ +static int madvise_hwpoison(unsigned long start, unsigned long end) +{ + int ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + for (; start < end; start += PAGE_SIZE) { + struct page *p; + int ret = get_user_pages(current, current->mm, start, 1, + 0, 0, &p, NULL); + if (ret != 1) + return ret; + printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n", + page_to_pfn(p), start); + /* Ignore return value for now */ + __memory_failure(page_to_pfn(p), 0, 1); + put_page(p); + } + return ret; +} +#endif + static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) @@ -308,6 +334,10 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) int write; size_t len; +#ifdef CONFIG_MEMORY_FAILURE + if (behavior == MADV_HWPOISON) + return madvise_hwpoison(start, start+len_in); +#endif if (!madvise_behavior_valid(behavior)) return error; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9b10d875378..e2b98a6875c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -29,6 +29,7 @@ #include <linux/rcupdate.h> #include <linux/limits.h> #include <linux/mutex.h> +#include <linux/rbtree.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/spinlock.h> @@ -43,6 +44,7 @@ struct cgroup_subsys mem_cgroup_subsys __read_mostly; #define MEM_CGROUP_RECLAIM_RETRIES 5 +struct mem_cgroup *root_mem_cgroup __read_mostly; #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ @@ -53,6 +55,7 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/ #endif static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */ +#define SOFTLIMIT_EVENTS_THRESH (1000) /* * Statistics for memory cgroup. @@ -66,6 +69,8 @@ enum mem_cgroup_stat_index { MEM_CGROUP_STAT_MAPPED_FILE, /* # of pages charged as file rss */ MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ + MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ + MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ MEM_CGROUP_STAT_NSTATS, }; @@ -78,6 +83,20 @@ struct mem_cgroup_stat { struct mem_cgroup_stat_cpu cpustat[0]; }; +static inline void +__mem_cgroup_stat_reset_safe(struct mem_cgroup_stat_cpu *stat, + enum mem_cgroup_stat_index idx) +{ + stat->count[idx] = 0; +} + +static inline s64 +__mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat, + enum mem_cgroup_stat_index idx) +{ + return stat->count[idx]; +} + /* * For accounting under irq disable, no need for increment preempt count. */ @@ -117,6 +136,12 @@ struct mem_cgroup_per_zone { unsigned long count[NR_LRU_LISTS]; struct zone_reclaim_stat reclaim_stat; + struct rb_node tree_node; /* RB tree node */ + unsigned long long usage_in_excess;/* Set to the value by which */ + /* the soft limit is exceeded*/ + bool on_tree; + struct mem_cgroup *mem; /* Back pointer, we cannot */ + /* use container_of */ }; /* Macro for accessing counter */ #define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) @@ -130,6 +155,26 @@ struct mem_cgroup_lru_info { }; /* + * Cgroups above their limits are maintained in a RB-Tree, independent of + * their hierarchy representation + */ + +struct mem_cgroup_tree_per_zone { + struct rb_root rb_root; + spinlock_t lock; +}; + +struct mem_cgroup_tree_per_node { + struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES]; +}; + +struct mem_cgroup_tree { + struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES]; +}; + +static struct mem_cgroup_tree soft_limit_tree __read_mostly; + +/* * The memory controller data structure. The memory controller controls both * page cache and RSS per cgroup. We would eventually like to provide * statistics based on the statistics developed by Rik Van Riel for clock-pro, @@ -186,6 +231,13 @@ struct mem_cgroup { struct mem_cgroup_stat stat; }; +/* + * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft + * limit reclaim to prevent infinite loops, if they ever occur. + */ +#define MEM_CGROUP_MAX_RECLAIM_LOOPS (100) +#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS (2) + enum charge_type { MEM_CGROUP_CHARGE_TYPE_CACHE = 0, MEM_CGROUP_CHARGE_TYPE_MAPPED, @@ -200,13 +252,8 @@ enum charge_type { #define PCGF_CACHE (1UL << PCG_CACHE) #define PCGF_USED (1UL << PCG_USED) #define PCGF_LOCK (1UL << PCG_LOCK) -static const unsigned long -pcg_default_flags[NR_CHARGE_TYPE] = { - PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */ - PCGF_USED | PCGF_LOCK, /* Anon */ - PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */ - 0, /* FORCE */ -}; +/* Not used, but added here for completeness */ +#define PCGF_ACCT (1UL << PCG_ACCT) /* for encoding cft->private value on file */ #define _MEM (0) @@ -215,15 +262,241 @@ pcg_default_flags[NR_CHARGE_TYPE] = { #define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff) #define MEMFILE_ATTR(val) ((val) & 0xffff) +/* + * Reclaim flags for mem_cgroup_hierarchical_reclaim + */ +#define MEM_CGROUP_RECLAIM_NOSWAP_BIT 0x0 +#define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT) +#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1 +#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT) +#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2 +#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT) + static void mem_cgroup_get(struct mem_cgroup *mem); static void mem_cgroup_put(struct mem_cgroup *mem); static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); +static struct mem_cgroup_per_zone * +mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) +{ + return &mem->info.nodeinfo[nid]->zoneinfo[zid]; +} + +static struct mem_cgroup_per_zone * +page_cgroup_zoneinfo(struct page_cgroup *pc) +{ + struct mem_cgroup *mem = pc->mem_cgroup; + int nid = page_cgroup_nid(pc); + int zid = page_cgroup_zid(pc); + + if (!mem) + return NULL; + + return mem_cgroup_zoneinfo(mem, nid, zid); +} + +static struct mem_cgroup_tree_per_zone * +soft_limit_tree_node_zone(int nid, int zid) +{ + return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; +} + +static struct mem_cgroup_tree_per_zone * +soft_limit_tree_from_page(struct page *page) +{ + int nid = page_to_nid(page); + int zid = page_zonenum(page); + + return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; +} + +static void +__mem_cgroup_insert_exceeded(struct mem_cgroup *mem, + struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz) +{ + struct rb_node **p = &mctz->rb_root.rb_node; + struct rb_node *parent = NULL; + struct mem_cgroup_per_zone *mz_node; + + if (mz->on_tree) + return; + + mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res); + while (*p) { + parent = *p; + mz_node = rb_entry(parent, struct mem_cgroup_per_zone, + tree_node); + if (mz->usage_in_excess < mz_node->usage_in_excess) + p = &(*p)->rb_left; + /* + * We can't avoid mem cgroups that are over their soft + * limit by the same amount + */ + else if (mz->usage_in_excess >= mz_node->usage_in_excess) + p = &(*p)->rb_right; + } + rb_link_node(&mz->tree_node, parent, p); + rb_insert_color(&mz->tree_node, &mctz->rb_root); + mz->on_tree = true; +} + +static void +__mem_cgroup_remove_exceeded(struct mem_cgroup *mem, + struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz) +{ + if (!mz->on_tree) + return; + rb_erase(&mz->tree_node, &mctz->rb_root); + mz->on_tree = false; +} + +static void +mem_cgroup_insert_exceeded(struct mem_cgroup *mem, + struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz) +{ + spin_lock(&mctz->lock); + __mem_cgroup_insert_exceeded(mem, mz, mctz); + spin_unlock(&mctz->lock); +} + +static void +mem_cgroup_remove_exceeded(struct mem_cgroup *mem, + struct mem_cgroup_per_zone *mz, + struct mem_cgroup_tree_per_zone *mctz) +{ + spin_lock(&mctz->lock); + __mem_cgroup_remove_exceeded(mem, mz, mctz); + spin_unlock(&mctz->lock); +} + +static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem) +{ + bool ret = false; + int cpu; + s64 val; + struct mem_cgroup_stat_cpu *cpustat; + + cpu = get_cpu(); + cpustat = &mem->stat.cpustat[cpu]; + val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_EVENTS); + if (unlikely(val > SOFTLIMIT_EVENTS_THRESH)) { + __mem_cgroup_stat_reset_safe(cpustat, MEM_CGROUP_STAT_EVENTS); + ret = true; + } + put_cpu(); + return ret; +} + +static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) +{ + unsigned long long prev_usage_in_excess, new_usage_in_excess; + bool updated_tree = false; + struct mem_cgroup_per_zone *mz; + struct mem_cgroup_tree_per_zone *mctz; + + mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page)); + mctz = soft_limit_tree_from_page(page); + + /* + * We do updates in lazy mode, mem's are removed + * lazily from the per-zone, per-node rb tree + */ + prev_usage_in_excess = mz->usage_in_excess; + + new_usage_in_excess = res_counter_soft_limit_excess(&mem->res); + if (prev_usage_in_excess) { + mem_cgroup_remove_exceeded(mem, mz, mctz); + updated_tree = true; + } + if (!new_usage_in_excess) + goto done; + mem_cgroup_insert_exceeded(mem, mz, mctz); + +done: + if (updated_tree) { + spin_lock(&mctz->lock); + mz->usage_in_excess = new_usage_in_excess; + spin_unlock(&mctz->lock); + } +} + +static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem) +{ + int node, zone; + struct mem_cgroup_per_zone *mz; + struct mem_cgroup_tree_per_zone *mctz; + + for_each_node_state(node, N_POSSIBLE) { + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + mz = mem_cgroup_zoneinfo(mem, node, zone); + mctz = soft_limit_tree_node_zone(node, zone); + mem_cgroup_remove_exceeded(mem, mz, mctz); + } + } +} + +static inline unsigned long mem_cgroup_get_excess(struct mem_cgroup *mem) +{ + return res_counter_soft_limit_excess(&mem->res) >> PAGE_SHIFT; +} + +static struct mem_cgroup_per_zone * +__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) +{ + struct rb_node *rightmost = NULL; + struct mem_cgroup_per_zone *mz = NULL; + +retry: + rightmost = rb_last(&mctz->rb_root); + if (!rightmost) + goto done; /* Nothing to reclaim from */ + + mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node); + /* + * Remove the node now but someone else can add it back, + * we will to add it back at the end of reclaim to its correct + * position in the tree. + */ + __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); + if (!res_counter_soft_limit_excess(&mz->mem->res) || + !css_tryget(&mz->mem->css)) + goto retry; +done: + return mz; +} + +static struct mem_cgroup_per_zone * +mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) +{ + struct mem_cgroup_per_zone *mz; + + spin_lock(&mctz->lock); + mz = __mem_cgroup_largest_soft_limit_node(mctz); + spin_unlock(&mctz->lock); + return mz; +} + +static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, + bool charge) +{ + int val = (charge) ? 1 : -1; + struct mem_cgroup_stat *stat = &mem->stat; + struct mem_cgroup_stat_cpu *cpustat; + int cpu = get_cpu(); + + cpustat = &stat->cpustat[cpu]; + __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val); + put_cpu(); +} + static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, struct page_cgroup *pc, bool charge) { - int val = (charge)? 1 : -1; + int val = (charge) ? 1 : -1; struct mem_cgroup_stat *stat = &mem->stat; struct mem_cgroup_stat_cpu *cpustat; int cpu = get_cpu(); @@ -240,28 +513,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, else __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); + __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_EVENTS, 1); put_cpu(); } -static struct mem_cgroup_per_zone * -mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) -{ - return &mem->info.nodeinfo[nid]->zoneinfo[zid]; -} - -static struct mem_cgroup_per_zone * -page_cgroup_zoneinfo(struct page_cgroup *pc) -{ - struct mem_cgroup *mem = pc->mem_cgroup; - int nid = page_cgroup_nid(pc); - int zid = page_cgroup_zid(pc); - - if (!mem) - return NULL; - - return mem_cgroup_zoneinfo(mem, nid, zid); -} - static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem, enum lru_list idx) { @@ -354,6 +609,11 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, return ret; } +static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) +{ + return (mem == root_mem_cgroup); +} + /* * Following LRU functions are allowed to be used without PCG_LOCK. * Operations are called by routine of global LRU independently from memcg. @@ -371,22 +631,24 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) { struct page_cgroup *pc; - struct mem_cgroup *mem; struct mem_cgroup_per_zone *mz; if (mem_cgroup_disabled()) return; pc = lookup_page_cgroup(page); /* can happen while we handle swapcache. */ - if (list_empty(&pc->lru) || !pc->mem_cgroup) + if (!TestClearPageCgroupAcctLRU(pc)) return; + VM_BUG_ON(!pc->mem_cgroup); /* * We don't check PCG_USED bit. It's cleared when the "page" is finally * removed from global LRU. */ mz = page_cgroup_zoneinfo(pc); - mem = pc->mem_cgroup; MEM_CGROUP_ZSTAT(mz, lru) -= 1; + if (mem_cgroup_is_root(pc->mem_cgroup)) + return; + VM_BUG_ON(list_empty(&pc->lru)); list_del_init(&pc->lru); return; } @@ -410,8 +672,8 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) * For making pc->mem_cgroup visible, insert smp_rmb() here. */ smp_rmb(); - /* unused page is not rotated. */ - if (!PageCgroupUsed(pc)) + /* unused or root page is not rotated. */ + if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup)) return; mz = page_cgroup_zoneinfo(pc); list_move(&pc->lru, &mz->lists[lru]); @@ -425,6 +687,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) if (mem_cgroup_disabled()) return; pc = lookup_page_cgroup(page); + VM_BUG_ON(PageCgroupAcctLRU(pc)); /* * Used bit is set without atomic ops but after smp_wmb(). * For making pc->mem_cgroup visible, insert smp_rmb() here. @@ -435,6 +698,9 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) mz = page_cgroup_zoneinfo(pc); MEM_CGROUP_ZSTAT(mz, lru) += 1; + SetPageCgroupAcctLRU(pc); + if (mem_cgroup_is_root(pc->mem_cgroup)) + return; list_add(&pc->lru, &mz->lists[lru]); } @@ -469,7 +735,7 @@ static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page) spin_lock_irqsave(&zone->lru_lock, flags); /* link when the page is linked to LRU but page_cgroup isn't */ - if (PageLRU(page) && list_empty(&pc->lru)) + if (PageLRU(page) && !PageCgroupAcctLRU(pc)) mem_cgroup_add_lru_list(page, page_lru(page)); spin_unlock_irqrestore(&zone->lru_lock, flags); } @@ -855,28 +1121,62 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) * If shrink==true, for avoiding to free too much, this returns immedieately. */ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, - gfp_t gfp_mask, bool noswap, bool shrink) + struct zone *zone, + gfp_t gfp_mask, + unsigned long reclaim_options) { struct mem_cgroup *victim; int ret, total = 0; int loop = 0; + bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; + bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; + bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; + unsigned long excess = mem_cgroup_get_excess(root_mem); /* If memsw_is_minimum==1, swap-out is of-no-use. */ if (root_mem->memsw_is_minimum) noswap = true; - while (loop < 2) { + while (1) { victim = mem_cgroup_select_victim(root_mem); - if (victim == root_mem) + if (victim == root_mem) { loop++; + if (loop >= 2) { + /* + * If we have not been able to reclaim + * anything, it might because there are + * no reclaimable pages under this hierarchy + */ + if (!check_soft || !total) { + css_put(&victim->css); + break; + } + /* + * We want to do more targetted reclaim. + * excess >> 2 is not to excessive so as to + * reclaim too much, nor too less that we keep + * coming back to reclaim from this cgroup + */ + if (total >= (excess >> 2) || + (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) { + css_put(&victim->css); + break; + } + } + } if (!mem_cgroup_local_usage(&victim->stat)) { /* this cgroup's local usage == 0 */ css_put(&victim->css); continue; } /* we use swappiness of local cgroup */ - ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap, - get_swappiness(victim)); + if (check_soft) + ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, + noswap, get_swappiness(victim), zone, + zone->zone_pgdat->node_id); + else + ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, + noswap, get_swappiness(victim)); css_put(&victim->css); /* * At shrinking usage, we can't check we should stop here or @@ -886,7 +1186,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, if (shrink) return ret; total += ret; - if (mem_cgroup_check_under_limit(root_mem)) + if (check_soft) { + if (res_counter_check_under_soft_limit(&root_mem->res)) + return total; + } else if (mem_cgroup_check_under_limit(root_mem)) return 1 + total; } return total; @@ -965,11 +1268,11 @@ done: */ static int __mem_cgroup_try_charge(struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **memcg, - bool oom) + bool oom, struct page *page) { - struct mem_cgroup *mem, *mem_over_limit; + struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; - struct res_counter *fail_res; + struct res_counter *fail_res, *soft_fail_res = NULL; if (unlikely(test_thread_flag(TIF_MEMDIE))) { /* Don't account this! */ @@ -996,20 +1299,23 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, VM_BUG_ON(css_is_removed(&mem->css)); while (1) { - int ret; - bool noswap = false; + int ret = 0; + unsigned long flags = 0; - ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); + if (mem_cgroup_is_root(mem)) + goto done; + ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, + &soft_fail_res); if (likely(!ret)) { if (!do_swap_account) break; ret = res_counter_charge(&mem->memsw, PAGE_SIZE, - &fail_res); + &fail_res, NULL); if (likely(!ret)) break; /* mem+swap counter fails */ - res_counter_uncharge(&mem->res, PAGE_SIZE); - noswap = true; + res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); + flags |= MEM_CGROUP_RECLAIM_NOSWAP; mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); } else @@ -1020,8 +1326,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, if (!(gfp_mask & __GFP_WAIT)) goto nomem; - ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, - noswap, false); + ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, + gfp_mask, flags); if (ret) continue; @@ -1046,13 +1352,24 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, goto nomem; } } + /* + * Insert just the ancestor, we should trickle down to the correct + * cgroup for reclaim, since the other nodes will be below their + * soft limit + */ + if (soft_fail_res) { + mem_over_soft_limit = + mem_cgroup_from_res_counter(soft_fail_res, res); + if (mem_cgroup_soft_limit_check(mem_over_soft_limit)) + mem_cgroup_update_tree(mem_over_soft_limit, page); + } +done: return 0; nomem: css_put(&mem->css); return -ENOMEM; } - /* * A helper function to get mem_cgroup from ID. must be called under * rcu_read_lock(). The caller must check css_is_removed() or some if @@ -1119,15 +1436,38 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, lock_page_cgroup(pc); if (unlikely(PageCgroupUsed(pc))) { unlock_page_cgroup(pc); - res_counter_uncharge(&mem->res, PAGE_SIZE); - if (do_swap_account) - res_counter_uncharge(&mem->memsw, PAGE_SIZE); + if (!mem_cgroup_is_root(mem)) { + res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); + if (do_swap_account) + res_counter_uncharge(&mem->memsw, PAGE_SIZE, + NULL); + } css_put(&mem->css); return; } + pc->mem_cgroup = mem; + /* + * We access a page_cgroup asynchronously without lock_page_cgroup(). + * Especially when a page_cgroup is taken from a page, pc->mem_cgroup + * is accessed after testing USED bit. To make pc->mem_cgroup visible + * before USED bit, we need memory barrier here. + * See mem_cgroup_add_lru_list(), etc. + */ smp_wmb(); - pc->flags = pcg_default_flags[ctype]; + switch (ctype) { + case MEM_CGROUP_CHARGE_TYPE_CACHE: + case MEM_CGROUP_CHARGE_TYPE_SHMEM: + SetPageCgroupCache(pc); + SetPageCgroupUsed(pc); + break; + case MEM_CGROUP_CHARGE_TYPE_MAPPED: + ClearPageCgroupCache(pc); + SetPageCgroupUsed(pc); + break; + default: + break; + } mem_cgroup_charge_statistics(mem, pc, true); @@ -1178,7 +1518,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, if (pc->mem_cgroup != from) goto out; - res_counter_uncharge(&from->res, PAGE_SIZE); + if (!mem_cgroup_is_root(from)) + res_counter_uncharge(&from->res, PAGE_SIZE, NULL); mem_cgroup_charge_statistics(from, pc, false); page = pc->page; @@ -1197,8 +1538,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, 1); } - if (do_swap_account) - res_counter_uncharge(&from->memsw, PAGE_SIZE); + if (do_swap_account && !mem_cgroup_is_root(from)) + res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); css_put(&from->css); css_get(&to->css); @@ -1238,7 +1579,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, parent = mem_cgroup_from_cont(pcg); - ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false); + ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page); if (ret || !parent) return ret; @@ -1268,9 +1609,11 @@ uncharge: /* drop extra refcnt by try_charge() */ css_put(&parent->css); /* uncharge if move fails */ - res_counter_uncharge(&parent->res, PAGE_SIZE); - if (do_swap_account) - res_counter_uncharge(&parent->memsw, PAGE_SIZE); + if (!mem_cgroup_is_root(parent)) { + res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); + if (do_swap_account) + res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); + } return ret; } @@ -1295,7 +1638,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, prefetchw(pc); mem = memcg; - ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true); + ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page); if (ret || !mem) return ret; @@ -1414,14 +1757,14 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, if (!mem) goto charge_cur_mm; *ptr = mem; - ret = __mem_cgroup_try_charge(NULL, mask, ptr, true); + ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, page); /* drop extra refcnt from tryget */ css_put(&mem->css); return ret; charge_cur_mm: if (unlikely(!mm)) mm = &init_mm; - return __mem_cgroup_try_charge(mm, mask, ptr, true); + return __mem_cgroup_try_charge(mm, mask, ptr, true, page); } static void @@ -1459,7 +1802,10 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, * This recorded memcg can be obsolete one. So, avoid * calling css_tryget */ - res_counter_uncharge(&memcg->memsw, PAGE_SIZE); + if (!mem_cgroup_is_root(memcg)) + res_counter_uncharge(&memcg->memsw, PAGE_SIZE, + NULL); + mem_cgroup_swap_statistics(memcg, false); mem_cgroup_put(memcg); } rcu_read_unlock(); @@ -1484,9 +1830,11 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) return; if (!mem) return; - res_counter_uncharge(&mem->res, PAGE_SIZE); - if (do_swap_account) - res_counter_uncharge(&mem->memsw, PAGE_SIZE); + if (!mem_cgroup_is_root(mem)) { + res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); + if (do_swap_account) + res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); + } css_put(&mem->css); } @@ -1500,6 +1848,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) struct page_cgroup *pc; struct mem_cgroup *mem = NULL; struct mem_cgroup_per_zone *mz; + bool soft_limit_excess = false; if (mem_cgroup_disabled()) return NULL; @@ -1538,9 +1887,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) break; } - res_counter_uncharge(&mem->res, PAGE_SIZE); - if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) - res_counter_uncharge(&mem->memsw, PAGE_SIZE); + if (!mem_cgroup_is_root(mem)) { + res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); + if (do_swap_account && + (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) + res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); + } + if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) + mem_cgroup_swap_statistics(mem, true); mem_cgroup_charge_statistics(mem, pc, false); ClearPageCgroupUsed(pc); @@ -1554,6 +1908,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) mz = page_cgroup_zoneinfo(pc); unlock_page_cgroup(pc); + if (soft_limit_excess && mem_cgroup_soft_limit_check(mem)) + mem_cgroup_update_tree(mem, page); /* at swapout, this memcg will be accessed to record to swap */ if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) css_put(&mem->css); @@ -1629,7 +1985,9 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) * We uncharge this because swap is freed. * This memcg can be obsolete one. We avoid calling css_tryget */ - res_counter_uncharge(&memcg->memsw, PAGE_SIZE); + if (!mem_cgroup_is_root(memcg)) + res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); + mem_cgroup_swap_statistics(memcg, false); mem_cgroup_put(memcg); } rcu_read_unlock(); @@ -1658,7 +2016,8 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) unlock_page_cgroup(pc); if (mem) { - ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false); + ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false, + page); css_put(&mem->css); } *ptr = mem; @@ -1798,8 +2157,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, if (!ret) break; - progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, - false, true); + progress = mem_cgroup_hierarchical_reclaim(memcg, NULL, + GFP_KERNEL, + MEM_CGROUP_RECLAIM_SHRINK); curusage = res_counter_read_u64(&memcg->res, RES_USAGE); /* Usage is reduced ? */ if (curusage >= oldusage) @@ -1851,7 +2211,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, if (!ret) break; - mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true, true); + mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, + MEM_CGROUP_RECLAIM_NOSWAP | + MEM_CGROUP_RECLAIM_SHRINK); curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); /* Usage is reduced ? */ if (curusage >= oldusage) @@ -1862,6 +2224,97 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, return ret; } +unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, + gfp_t gfp_mask, int nid, + int zid) +{ + unsigned long nr_reclaimed = 0; + struct mem_cgroup_per_zone *mz, *next_mz = NULL; + unsigned long reclaimed; + int loop = 0; + struct mem_cgroup_tree_per_zone *mctz; + + if (order > 0) + return 0; + + mctz = soft_limit_tree_node_zone(nid, zid); + /* + * This loop can run a while, specially if mem_cgroup's continuously + * keep exceeding their soft limit and putting the system under + * pressure + */ + do { + if (next_mz) + mz = next_mz; + else + mz = mem_cgroup_largest_soft_limit_node(mctz); + if (!mz) + break; + + reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, + gfp_mask, + MEM_CGROUP_RECLAIM_SOFT); + nr_reclaimed += reclaimed; + spin_lock(&mctz->lock); + + /* + * If we failed to reclaim anything from this memory cgroup + * it is time to move on to the next cgroup + */ + next_mz = NULL; + if (!reclaimed) { + do { + /* + * Loop until we find yet another one. + * + * By the time we get the soft_limit lock + * again, someone might have aded the + * group back on the RB tree. Iterate to + * make sure we get a different mem. + * mem_cgroup_largest_soft_limit_node returns + * NULL if no other cgroup is present on + * the tree + */ + next_mz = + __mem_cgroup_largest_soft_limit_node(mctz); + if (next_mz == mz) { + css_put(&next_mz->mem->css); + next_mz = NULL; + } else /* next_mz == NULL or other memcg */ + break; + } while (1); + } + mz->usage_in_excess = + res_counter_soft_limit_excess(&mz->mem->res); + __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); + /* + * One school of thought says that we should not add + * back the node to the tree if reclaim returns 0. + * But our reclaim could return 0, simply because due + * to priority we are exposing a smaller subset of + * memory to reclaim from. Consider this as a longer + * term TODO. + */ + if (mz->usage_in_excess) + __mem_cgroup_insert_exceeded(mz->mem, mz, mctz); + spin_unlock(&mctz->lock); + css_put(&mz->mem->css); + loop++; + /* + * Could not reclaim anything and there are no more + * mem cgroups to try or we seem to be looping without + * reclaiming anything. + */ + if (!nr_reclaimed && + (next_mz == NULL || + loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS)) + break; + } while (!nr_reclaimed); + if (next_mz) + css_put(&next_mz->mem->css); + return nr_reclaimed; +} + /* * This routine traverse page_cgroup in given list and drop them all. * *And* this routine doesn't reclaim page itself, just removes page_cgroup. @@ -2046,20 +2499,64 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, return retval; } +struct mem_cgroup_idx_data { + s64 val; + enum mem_cgroup_stat_index idx; +}; + +static int +mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data) +{ + struct mem_cgroup_idx_data *d = data; + d->val += mem_cgroup_read_stat(&mem->stat, d->idx); + return 0; +} + +static void +mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, + enum mem_cgroup_stat_index idx, s64 *val) +{ + struct mem_cgroup_idx_data d; + d.idx = idx; + d.val = 0; + mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat); + *val = d.val; +} + static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) { struct mem_cgroup *mem = mem_cgroup_from_cont(cont); - u64 val = 0; + u64 idx_val, val; int type, name; type = MEMFILE_TYPE(cft->private); name = MEMFILE_ATTR(cft->private); switch (type) { case _MEM: - val = res_counter_read_u64(&mem->res, name); + if (name == RES_USAGE && mem_cgroup_is_root(mem)) { + mem_cgroup_get_recursive_idx_stat(mem, + MEM_CGROUP_STAT_CACHE, &idx_val); + val = idx_val; + mem_cgroup_get_recursive_idx_stat(mem, + MEM_CGROUP_STAT_RSS, &idx_val); + val += idx_val; + val <<= PAGE_SHIFT; + } else + val = res_counter_read_u64(&mem->res, name); break; case _MEMSWAP: - val = res_counter_read_u64(&mem->memsw, name); + if (name == RES_USAGE && mem_cgroup_is_root(mem)) { + mem_cgroup_get_recursive_idx_stat(mem, + MEM_CGROUP_STAT_CACHE, &idx_val); + val = idx_val; + mem_cgroup_get_recursive_idx_stat(mem, + MEM_CGROUP_STAT_RSS, &idx_val); + val += idx_val; + mem_cgroup_get_recursive_idx_stat(mem, + MEM_CGROUP_STAT_SWAPOUT, &idx_val); + val <<= PAGE_SHIFT; + } else + val = res_counter_read_u64(&mem->memsw, name); break; default: BUG(); @@ -2083,6 +2580,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, name = MEMFILE_ATTR(cft->private); switch (name) { case RES_LIMIT: + if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */ + ret = -EINVAL; + break; + } /* This function does all necessary parse...reuse it */ ret = res_counter_memparse_write_strategy(buffer, &val); if (ret) @@ -2092,6 +2593,20 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, else ret = mem_cgroup_resize_memsw_limit(memcg, val); break; + case RES_SOFT_LIMIT: + ret = res_counter_memparse_write_strategy(buffer, &val); + if (ret) + break; + /* + * For memsw, soft limits are hard to implement in terms + * of semantics, for now, we support soft limits for + * control without swap + */ + if (type == _MEM) + ret = res_counter_set_soft_limit(&memcg->res, val); + else + ret = -EINVAL; + break; default: ret = -EINVAL; /* should be BUG() ? */ break; @@ -2149,6 +2664,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) res_counter_reset_failcnt(&mem->memsw); break; } + return 0; } @@ -2160,6 +2676,7 @@ enum { MCS_MAPPED_FILE, MCS_PGPGIN, MCS_PGPGOUT, + MCS_SWAP, MCS_INACTIVE_ANON, MCS_ACTIVE_ANON, MCS_INACTIVE_FILE, @@ -2181,6 +2698,7 @@ struct { {"mapped_file", "total_mapped_file"}, {"pgpgin", "total_pgpgin"}, {"pgpgout", "total_pgpgout"}, + {"swap", "total_swap"}, {"inactive_anon", "total_inactive_anon"}, {"active_anon", "total_active_anon"}, {"inactive_file", "total_inactive_file"}, @@ -2205,6 +2723,10 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) s->stat[MCS_PGPGIN] += val; val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); s->stat[MCS_PGPGOUT] += val; + if (do_swap_account) { + val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_SWAPOUT); + s->stat[MCS_SWAP] += val * PAGE_SIZE; + } /* per zone stat */ val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON); @@ -2236,8 +2758,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, memset(&mystat, 0, sizeof(mystat)); mem_cgroup_get_local_stat(mem_cont, &mystat); - for (i = 0; i < NR_MCS_STAT; i++) + for (i = 0; i < NR_MCS_STAT; i++) { + if (i == MCS_SWAP && !do_swap_account) + continue; cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]); + } /* Hierarchical information */ { @@ -2250,9 +2775,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, memset(&mystat, 0, sizeof(mystat)); mem_cgroup_get_total_stat(mem_cont, &mystat); - for (i = 0; i < NR_MCS_STAT; i++) + for (i = 0; i < NR_MCS_STAT; i++) { + if (i == MCS_SWAP && !do_swap_account) + continue; cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]); - + } #ifdef CONFIG_DEBUG_VM cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL)); @@ -2345,6 +2872,12 @@ static struct cftype mem_cgroup_files[] = { .read_u64 = mem_cgroup_read, }, { + .name = "soft_limit_in_bytes", + .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), + .write_string = mem_cgroup_write, + .read_u64 = mem_cgroup_read, + }, + { .name = "failcnt", .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), .trigger = mem_cgroup_reset, @@ -2438,6 +2971,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) mz = &pn->zoneinfo[zone]; for_each_lru(l) INIT_LIST_HEAD(&mz->lists[l]); + mz->usage_in_excess = 0; + mz->on_tree = false; + mz->mem = mem; } return 0; } @@ -2483,6 +3019,7 @@ static void __mem_cgroup_free(struct mem_cgroup *mem) { int node; + mem_cgroup_remove_from_trees(mem); free_css_id(&mem_cgroup_subsys, &mem->css); for_each_node_state(node, N_POSSIBLE) @@ -2531,6 +3068,31 @@ static void __init enable_swap_cgroup(void) } #endif +static int mem_cgroup_soft_limit_tree_init(void) +{ + struct mem_cgroup_tree_per_node *rtpn; + struct mem_cgroup_tree_per_zone *rtpz; + int tmp, node, zone; + + for_each_node_state(node, N_POSSIBLE) { + tmp = node; + if (!node_state(node, N_NORMAL_MEMORY)) + tmp = -1; + rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp); + if (!rtpn) + return 1; + + soft_limit_tree.rb_tree_per_node[node] = rtpn; + + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + rtpz = &rtpn->rb_tree_per_zone[zone]; + rtpz->rb_root = RB_ROOT; + spin_lock_init(&rtpz->lock); + } + } + return 0; +} + static struct cgroup_subsys_state * __ref mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { @@ -2545,10 +3107,15 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) for_each_node_state(node, N_POSSIBLE) if (alloc_mem_cgroup_per_zone_info(mem, node)) goto free_out; + /* root ? */ if (cont->parent == NULL) { enable_swap_cgroup(); parent = NULL; + root_mem_cgroup = mem; + if (mem_cgroup_soft_limit_tree_init()) + goto free_out; + } else { parent = mem_cgroup_from_cont(cont->parent); mem->use_hierarchy = parent->use_hierarchy; @@ -2577,6 +3144,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) return &mem->css; free_out: __mem_cgroup_free(mem); + root_mem_cgroup = NULL; return ERR_PTR(error); } @@ -2612,7 +3180,8 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss, static void mem_cgroup_move_task(struct cgroup_subsys *ss, struct cgroup *cont, struct cgroup *old_cont, - struct task_struct *p) + struct task_struct *p, + bool threadgroup) { mutex_lock(&memcg_tasklist); /* diff --git a/mm/memory-failure.c b/mm/memory-failure.c new file mode 100644 index 00000000000..729d4b15b64 --- /dev/null +++ b/mm/memory-failure.c @@ -0,0 +1,832 @@ +/* + * Copyright (C) 2008, 2009 Intel Corporation + * Authors: Andi Kleen, Fengguang Wu + * + * This software may be redistributed and/or modified under the terms of + * the GNU General Public License ("GPL") version 2 only as published by the + * Free Software Foundation. + * + * High level machine check handler. Handles pages reported by the + * hardware as being corrupted usually due to a 2bit ECC memory or cache + * failure. + * + * Handles page cache pages in various states. The tricky part + * here is that we can access any page asynchronous to other VM + * users, because memory failures could happen anytime and anywhere, + * possibly violating some of their assumptions. This is why this code + * has to be extremely careful. Generally it tries to use normal locking + * rules, as in get the standard locks, even if that means the + * error handling takes potentially a long time. + * + * The operation to map back from RMAP chains to processes has to walk + * the complete process list and has non linear complexity with the number + * mappings. In short it can be quite slow. But since memory corruptions + * are rare we hope to get away with this. + */ + +/* + * Notebook: + * - hugetlb needs more code + * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages + * - pass bad pages to kdump next kernel + */ +#define DEBUG 1 /* remove me in 2.6.34 */ +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/page-flags.h> +#include <linux/sched.h> +#include <linux/rmap.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <linux/backing-dev.h> +#include "internal.h" + +int sysctl_memory_failure_early_kill __read_mostly = 0; + +int sysctl_memory_failure_recovery __read_mostly = 1; + +atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0); + +/* + * Send all the processes who have the page mapped an ``action optional'' + * signal. + */ +static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno, + unsigned long pfn) +{ + struct siginfo si; + int ret; + + printk(KERN_ERR + "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n", + pfn, t->comm, t->pid); + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_MCEERR_AO; + si.si_addr = (void *)addr; +#ifdef __ARCH_SI_TRAPNO + si.si_trapno = trapno; +#endif + si.si_addr_lsb = PAGE_SHIFT; + /* + * Don't use force here, it's convenient if the signal + * can be temporarily blocked. + * This could cause a loop when the user sets SIGBUS + * to SIG_IGN, but hopefully noone will do that? + */ + ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ + if (ret < 0) + printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n", + t->comm, t->pid, ret); + return ret; +} + +/* + * Kill all processes that have a poisoned page mapped and then isolate + * the page. + * + * General strategy: + * Find all processes having the page mapped and kill them. + * But we keep a page reference around so that the page is not + * actually freed yet. + * Then stash the page away + * + * There's no convenient way to get back to mapped processes + * from the VMAs. So do a brute-force search over all + * running processes. + * + * Remember that machine checks are not common (or rather + * if they are common you have other problems), so this shouldn't + * be a performance issue. + * + * Also there are some races possible while we get from the + * error detection to actually handle it. + */ + +struct to_kill { + struct list_head nd; + struct task_struct *tsk; + unsigned long addr; + unsigned addr_valid:1; +}; + +/* + * Failure handling: if we can't find or can't kill a process there's + * not much we can do. We just print a message and ignore otherwise. + */ + +/* + * Schedule a process for later kill. + * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM. + * TBD would GFP_NOIO be enough? + */ +static void add_to_kill(struct task_struct *tsk, struct page *p, + struct vm_area_struct *vma, + struct list_head *to_kill, + struct to_kill **tkc) +{ + struct to_kill *tk; + + if (*tkc) { + tk = *tkc; + *tkc = NULL; + } else { + tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC); + if (!tk) { + printk(KERN_ERR + "MCE: Out of memory while machine check handling\n"); + return; + } + } + tk->addr = page_address_in_vma(p, vma); + tk->addr_valid = 1; + + /* + * In theory we don't have to kill when the page was + * munmaped. But it could be also a mremap. Since that's + * likely very rare kill anyways just out of paranoia, but use + * a SIGKILL because the error is not contained anymore. + */ + if (tk->addr == -EFAULT) { + pr_debug("MCE: Unable to find user space address %lx in %s\n", + page_to_pfn(p), tsk->comm); + tk->addr_valid = 0; + } + get_task_struct(tsk); + tk->tsk = tsk; + list_add_tail(&tk->nd, to_kill); +} + +/* + * Kill the processes that have been collected earlier. + * + * Only do anything when DOIT is set, otherwise just free the list + * (this is used for clean pages which do not need killing) + * Also when FAIL is set do a force kill because something went + * wrong earlier. + */ +static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, + int fail, unsigned long pfn) +{ + struct to_kill *tk, *next; + + list_for_each_entry_safe (tk, next, to_kill, nd) { + if (doit) { + /* + * In case something went wrong with munmaping + * make sure the process doesn't catch the + * signal and then access the memory. Just kill it. + * the signal handlers + */ + if (fail || tk->addr_valid == 0) { + printk(KERN_ERR + "MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", + pfn, tk->tsk->comm, tk->tsk->pid); + force_sig(SIGKILL, tk->tsk); + } + + /* + * In theory the process could have mapped + * something else on the address in-between. We could + * check for that, but we need to tell the + * process anyways. + */ + else if (kill_proc_ao(tk->tsk, tk->addr, trapno, + pfn) < 0) + printk(KERN_ERR + "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", + pfn, tk->tsk->comm, tk->tsk->pid); + } + put_task_struct(tk->tsk); + kfree(tk); + } +} + +static int task_early_kill(struct task_struct *tsk) +{ + if (!tsk->mm) + return 0; + if (tsk->flags & PF_MCE_PROCESS) + return !!(tsk->flags & PF_MCE_EARLY); + return sysctl_memory_failure_early_kill; +} + +/* + * Collect processes when the error hit an anonymous page. + */ +static void collect_procs_anon(struct page *page, struct list_head *to_kill, + struct to_kill **tkc) +{ + struct vm_area_struct *vma; + struct task_struct *tsk; + struct anon_vma *av; + + read_lock(&tasklist_lock); + av = page_lock_anon_vma(page); + if (av == NULL) /* Not actually mapped anymore */ + goto out; + for_each_process (tsk) { + if (!task_early_kill(tsk)) + continue; + list_for_each_entry (vma, &av->head, anon_vma_node) { + if (!page_mapped_in_vma(page, vma)) + continue; + if (vma->vm_mm == tsk->mm) + add_to_kill(tsk, page, vma, to_kill, tkc); + } + } + page_unlock_anon_vma(av); +out: + read_unlock(&tasklist_lock); +} + +/* + * Collect processes when the error hit a file mapped page. + */ +static void collect_procs_file(struct page *page, struct list_head *to_kill, + struct to_kill **tkc) +{ + struct vm_area_struct *vma; + struct task_struct *tsk; + struct prio_tree_iter iter; + struct address_space *mapping = page->mapping; + + /* + * A note on the locking order between the two locks. + * We don't rely on this particular order. + * If you have some other code that needs a different order + * feel free to switch them around. Or add a reverse link + * from mm_struct to task_struct, then this could be all + * done without taking tasklist_lock and looping over all tasks. + */ + + read_lock(&tasklist_lock); + spin_lock(&mapping->i_mmap_lock); + for_each_process(tsk) { + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + + if (!task_early_kill(tsk)) + continue; + + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, + pgoff) { + /* + * Send early kill signal to tasks where a vma covers + * the page but the corrupted page is not necessarily + * mapped it in its pte. + * Assume applications who requested early kill want + * to be informed of all such data corruptions. + */ + if (vma->vm_mm == tsk->mm) + add_to_kill(tsk, page, vma, to_kill, tkc); + } + } + spin_unlock(&mapping->i_mmap_lock); + read_unlock(&tasklist_lock); +} + +/* + * Collect the processes who have the corrupted page mapped to kill. + * This is done in two steps for locking reasons. + * First preallocate one tokill structure outside the spin locks, + * so that we can kill at least one process reasonably reliable. + */ +static void collect_procs(struct page *page, struct list_head *tokill) +{ + struct to_kill *tk; + + if (!page->mapping) + return; + + tk = kmalloc(sizeof(struct to_kill), GFP_NOIO); + if (!tk) + return; + if (PageAnon(page)) + collect_procs_anon(page, tokill, &tk); + else + collect_procs_file(page, tokill, &tk); + kfree(tk); +} + +/* + * Error handlers for various types of pages. + */ + +enum outcome { + FAILED, /* Error handling failed */ + DELAYED, /* Will be handled later */ + IGNORED, /* Error safely ignored */ + RECOVERED, /* Successfully recovered */ +}; + +static const char *action_name[] = { + [FAILED] = "Failed", + [DELAYED] = "Delayed", + [IGNORED] = "Ignored", + [RECOVERED] = "Recovered", +}; + +/* + * Error hit kernel page. + * Do nothing, try to be lucky and not touch this instead. For a few cases we + * could be more sophisticated. + */ +static int me_kernel(struct page *p, unsigned long pfn) +{ + return DELAYED; +} + +/* + * Already poisoned page. + */ +static int me_ignore(struct page *p, unsigned long pfn) +{ + return IGNORED; +} + +/* + * Page in unknown state. Do nothing. + */ +static int me_unknown(struct page *p, unsigned long pfn) +{ + printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn); + return FAILED; +} + +/* + * Free memory + */ +static int me_free(struct page *p, unsigned long pfn) +{ + return DELAYED; +} + +/* + * Clean (or cleaned) page cache page. + */ +static int me_pagecache_clean(struct page *p, unsigned long pfn) +{ + int err; + int ret = FAILED; + struct address_space *mapping; + + if (!isolate_lru_page(p)) + page_cache_release(p); + + /* + * For anonymous pages we're done the only reference left + * should be the one m_f() holds. + */ + if (PageAnon(p)) + return RECOVERED; + + /* + * Now truncate the page in the page cache. This is really + * more like a "temporary hole punch" + * Don't do this for block devices when someone else + * has a reference, because it could be file system metadata + * and that's not safe to truncate. + */ + mapping = page_mapping(p); + if (!mapping) { + /* + * Page has been teared down in the meanwhile + */ + return FAILED; + } + + /* + * Truncation is a bit tricky. Enable it per file system for now. + * + * Open: to take i_mutex or not for this? Right now we don't. + */ + if (mapping->a_ops->error_remove_page) { + err = mapping->a_ops->error_remove_page(mapping, p); + if (err != 0) { + printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n", + pfn, err); + } else if (page_has_private(p) && + !try_to_release_page(p, GFP_NOIO)) { + pr_debug("MCE %#lx: failed to release buffers\n", pfn); + } else { + ret = RECOVERED; + } + } else { + /* + * If the file system doesn't support it just invalidate + * This fails on dirty or anything with private pages + */ + if (invalidate_inode_page(p)) + ret = RECOVERED; + else + printk(KERN_INFO "MCE %#lx: Failed to invalidate\n", + pfn); + } + return ret; +} + +/* + * Dirty cache page page + * Issues: when the error hit a hole page the error is not properly + * propagated. + */ +static int me_pagecache_dirty(struct page *p, unsigned long pfn) +{ + struct address_space *mapping = page_mapping(p); + + SetPageError(p); + /* TBD: print more information about the file. */ + if (mapping) { + /* + * IO error will be reported by write(), fsync(), etc. + * who check the mapping. + * This way the application knows that something went + * wrong with its dirty file data. + * + * There's one open issue: + * + * The EIO will be only reported on the next IO + * operation and then cleared through the IO map. + * Normally Linux has two mechanisms to pass IO error + * first through the AS_EIO flag in the address space + * and then through the PageError flag in the page. + * Since we drop pages on memory failure handling the + * only mechanism open to use is through AS_AIO. + * + * This has the disadvantage that it gets cleared on + * the first operation that returns an error, while + * the PageError bit is more sticky and only cleared + * when the page is reread or dropped. If an + * application assumes it will always get error on + * fsync, but does other operations on the fd before + * and the page is dropped inbetween then the error + * will not be properly reported. + * + * This can already happen even without hwpoisoned + * pages: first on metadata IO errors (which only + * report through AS_EIO) or when the page is dropped + * at the wrong time. + * + * So right now we assume that the application DTRT on + * the first EIO, but we're not worse than other parts + * of the kernel. + */ + mapping_set_error(mapping, EIO); + } + + return me_pagecache_clean(p, pfn); +} + +/* + * Clean and dirty swap cache. + * + * Dirty swap cache page is tricky to handle. The page could live both in page + * cache and swap cache(ie. page is freshly swapped in). So it could be + * referenced concurrently by 2 types of PTEs: + * normal PTEs and swap PTEs. We try to handle them consistently by calling + * try_to_unmap(TTU_IGNORE_HWPOISON) to convert the normal PTEs to swap PTEs, + * and then + * - clear dirty bit to prevent IO + * - remove from LRU + * - but keep in the swap cache, so that when we return to it on + * a later page fault, we know the application is accessing + * corrupted data and shall be killed (we installed simple + * interception code in do_swap_page to catch it). + * + * Clean swap cache pages can be directly isolated. A later page fault will + * bring in the known good data from disk. + */ +static int me_swapcache_dirty(struct page *p, unsigned long pfn) +{ + int ret = FAILED; + + ClearPageDirty(p); + /* Trigger EIO in shmem: */ + ClearPageUptodate(p); + + if (!isolate_lru_page(p)) { + page_cache_release(p); + ret = DELAYED; + } + + return ret; +} + +static int me_swapcache_clean(struct page *p, unsigned long pfn) +{ + int ret = FAILED; + + if (!isolate_lru_page(p)) { + page_cache_release(p); + ret = RECOVERED; + } + delete_from_swap_cache(p); + return ret; +} + +/* + * Huge pages. Needs work. + * Issues: + * No rmap support so we cannot find the original mapper. In theory could walk + * all MMs and look for the mappings, but that would be non atomic and racy. + * Need rmap for hugepages for this. Alternatively we could employ a heuristic, + * like just walking the current process and hoping it has it mapped (that + * should be usually true for the common "shared database cache" case) + * Should handle free huge pages and dequeue them too, but this needs to + * handle huge page accounting correctly. + */ +static int me_huge_page(struct page *p, unsigned long pfn) +{ + return FAILED; +} + +/* + * Various page states we can handle. + * + * A page state is defined by its current page->flags bits. + * The table matches them in order and calls the right handler. + * + * This is quite tricky because we can access page at any time + * in its live cycle, so all accesses have to be extremly careful. + * + * This is not complete. More states could be added. + * For any missing state don't attempt recovery. + */ + +#define dirty (1UL << PG_dirty) +#define sc (1UL << PG_swapcache) +#define unevict (1UL << PG_unevictable) +#define mlock (1UL << PG_mlocked) +#define writeback (1UL << PG_writeback) +#define lru (1UL << PG_lru) +#define swapbacked (1UL << PG_swapbacked) +#define head (1UL << PG_head) +#define tail (1UL << PG_tail) +#define compound (1UL << PG_compound) +#define slab (1UL << PG_slab) +#define buddy (1UL << PG_buddy) +#define reserved (1UL << PG_reserved) + +static struct page_state { + unsigned long mask; + unsigned long res; + char *msg; + int (*action)(struct page *p, unsigned long pfn); +} error_states[] = { + { reserved, reserved, "reserved kernel", me_ignore }, + { buddy, buddy, "free kernel", me_free }, + + /* + * Could in theory check if slab page is free or if we can drop + * currently unused objects without touching them. But just + * treat it as standard kernel for now. + */ + { slab, slab, "kernel slab", me_kernel }, + +#ifdef CONFIG_PAGEFLAGS_EXTENDED + { head, head, "huge", me_huge_page }, + { tail, tail, "huge", me_huge_page }, +#else + { compound, compound, "huge", me_huge_page }, +#endif + + { sc|dirty, sc|dirty, "swapcache", me_swapcache_dirty }, + { sc|dirty, sc, "swapcache", me_swapcache_clean }, + + { unevict|dirty, unevict|dirty, "unevictable LRU", me_pagecache_dirty}, + { unevict, unevict, "unevictable LRU", me_pagecache_clean}, + +#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT + { mlock|dirty, mlock|dirty, "mlocked LRU", me_pagecache_dirty }, + { mlock, mlock, "mlocked LRU", me_pagecache_clean }, +#endif + + { lru|dirty, lru|dirty, "LRU", me_pagecache_dirty }, + { lru|dirty, lru, "clean LRU", me_pagecache_clean }, + { swapbacked, swapbacked, "anonymous", me_pagecache_clean }, + + /* + * Catchall entry: must be at end. + */ + { 0, 0, "unknown page state", me_unknown }, +}; + +#undef lru + +static void action_result(unsigned long pfn, char *msg, int result) +{ + struct page *page = NULL; + if (pfn_valid(pfn)) + page = pfn_to_page(pfn); + + printk(KERN_ERR "MCE %#lx: %s%s page recovery: %s\n", + pfn, + page && PageDirty(page) ? "dirty " : "", + msg, action_name[result]); +} + +static int page_action(struct page_state *ps, struct page *p, + unsigned long pfn, int ref) +{ + int result; + + result = ps->action(p, pfn); + action_result(pfn, ps->msg, result); + if (page_count(p) != 1 + ref) + printk(KERN_ERR + "MCE %#lx: %s page still referenced by %d users\n", + pfn, ps->msg, page_count(p) - 1); + + /* Could do more checks here if page looks ok */ + /* + * Could adjust zone counters here to correct for the missing page. + */ + + return result == RECOVERED ? 0 : -EBUSY; +} + +#define N_UNMAP_TRIES 5 + +/* + * Do all that is necessary to remove user space mappings. Unmap + * the pages and send SIGBUS to the processes if the data was dirty. + */ +static void hwpoison_user_mappings(struct page *p, unsigned long pfn, + int trapno) +{ + enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; + struct address_space *mapping; + LIST_HEAD(tokill); + int ret; + int i; + int kill = 1; + + if (PageReserved(p) || PageCompound(p) || PageSlab(p)) + return; + + if (!PageLRU(p)) + lru_add_drain_all(); + + /* + * This check implies we don't kill processes if their pages + * are in the swap cache early. Those are always late kills. + */ + if (!page_mapped(p)) + return; + + if (PageSwapCache(p)) { + printk(KERN_ERR + "MCE %#lx: keeping poisoned page in swap cache\n", pfn); + ttu |= TTU_IGNORE_HWPOISON; + } + + /* + * Propagate the dirty bit from PTEs to struct page first, because we + * need this to decide if we should kill or just drop the page. + */ + mapping = page_mapping(p); + if (!PageDirty(p) && mapping && mapping_cap_writeback_dirty(mapping)) { + if (page_mkclean(p)) { + SetPageDirty(p); + } else { + kill = 0; + ttu |= TTU_IGNORE_HWPOISON; + printk(KERN_INFO + "MCE %#lx: corrupted page was clean: dropped without side effects\n", + pfn); + } + } + + /* + * First collect all the processes that have the page + * mapped in dirty form. This has to be done before try_to_unmap, + * because ttu takes the rmap data structures down. + * + * Error handling: We ignore errors here because + * there's nothing that can be done. + */ + if (kill) + collect_procs(p, &tokill); + + /* + * try_to_unmap can fail temporarily due to races. + * Try a few times (RED-PEN better strategy?) + */ + for (i = 0; i < N_UNMAP_TRIES; i++) { + ret = try_to_unmap(p, ttu); + if (ret == SWAP_SUCCESS) + break; + pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn, ret); + } + + if (ret != SWAP_SUCCESS) + printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", + pfn, page_mapcount(p)); + + /* + * Now that the dirty bit has been propagated to the + * struct page and all unmaps done we can decide if + * killing is needed or not. Only kill when the page + * was dirty, otherwise the tokill list is merely + * freed. When there was a problem unmapping earlier + * use a more force-full uncatchable kill to prevent + * any accesses to the poisoned memory. + */ + kill_procs_ao(&tokill, !!PageDirty(p), trapno, + ret != SWAP_SUCCESS, pfn); +} + +int __memory_failure(unsigned long pfn, int trapno, int ref) +{ + struct page_state *ps; + struct page *p; + int res; + + if (!sysctl_memory_failure_recovery) + panic("Memory failure from trap %d on page %lx", trapno, pfn); + + if (!pfn_valid(pfn)) { + action_result(pfn, "memory outside kernel control", IGNORED); + return -EIO; + } + + p = pfn_to_page(pfn); + if (TestSetPageHWPoison(p)) { + action_result(pfn, "already hardware poisoned", IGNORED); + return 0; + } + + atomic_long_add(1, &mce_bad_pages); + + /* + * We need/can do nothing about count=0 pages. + * 1) it's a free page, and therefore in safe hand: + * prep_new_page() will be the gate keeper. + * 2) it's part of a non-compound high order page. + * Implies some kernel user: cannot stop them from + * R/W the page; let's pray that the page has been + * used and will be freed some time later. + * In fact it's dangerous to directly bump up page count from 0, + * that may make page_freeze_refs()/page_unfreeze_refs() mismatch. + */ + if (!get_page_unless_zero(compound_head(p))) { + action_result(pfn, "free or high order kernel", IGNORED); + return PageBuddy(compound_head(p)) ? 0 : -EBUSY; + } + + /* + * Lock the page and wait for writeback to finish. + * It's very difficult to mess with pages currently under IO + * and in many cases impossible, so we just avoid it here. + */ + lock_page_nosync(p); + wait_on_page_writeback(p); + + /* + * Now take care of user space mappings. + */ + hwpoison_user_mappings(p, pfn, trapno); + + /* + * Torn down by someone else? + */ + if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { + action_result(pfn, "already truncated LRU", IGNORED); + res = 0; + goto out; + } + + res = -EBUSY; + for (ps = error_states;; ps++) { + if ((p->flags & ps->mask) == ps->res) { + res = page_action(ps, p, pfn, ref); + break; + } + } +out: + unlock_page(p); + return res; +} +EXPORT_SYMBOL_GPL(__memory_failure); + +/** + * memory_failure - Handle memory failure of a page. + * @pfn: Page Number of the corrupted page + * @trapno: Trap number reported in the signal to user space. + * + * This function is called by the low level machine check code + * of an architecture when it detects hardware memory corruption + * of a page. It tries its best to recover, which includes + * dropping pages, killing processes etc. + * + * The function is primarily of use for corruptions that + * happen outside the current execution context (e.g. when + * detected by a background scrubber) + * + * Must run in process context (e.g. a work queue) with interrupts + * enabled and no spinlocks hold. + */ +void memory_failure(unsigned long pfn, int trapno) +{ + __memory_failure(pfn, trapno, 0); +} diff --git a/mm/memory.c b/mm/memory.c index b1443ac07c0..7e91b5f9f69 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr = vma->vm_start; /* - * Hide vma from rmap and vmtruncate before freeing pgtables + * Hide vma from rmap and truncate_pagecache before freeing + * pgtables */ anon_vma_unlink(vma); unlink_file_vma(vma); @@ -1325,7 +1326,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_OOM) return i ? i : -ENOMEM; - else if (ret & VM_FAULT_SIGBUS) + if (ret & + (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) return i ? i : -EFAULT; BUG(); } @@ -2407,7 +2409,7 @@ restart: * @mapping: the address space containing mmaps to be unmapped. * @holebegin: byte in first page to unmap, relative to the start of * the underlying file. This will be rounded down to a PAGE_SIZE - * boundary. Note that this is different from vmtruncate(), which + * boundary. Note that this is different from truncate_pagecache(), which * must keep the partial page. In contrast, we must get rid of * partial pages. * @holelen: size of prospective hole in bytes. This will be rounded @@ -2458,63 +2460,6 @@ void unmap_mapping_range(struct address_space *mapping, } EXPORT_SYMBOL(unmap_mapping_range); -/** - * vmtruncate - unmap mappings "freed" by truncate() syscall - * @inode: inode of the file used - * @offset: file offset to start truncating - * - * NOTE! We have to be ready to update the memory sharing - * between the file and the memory map for a potential last - * incomplete page. Ugly, but necessary. - */ -int vmtruncate(struct inode * inode, loff_t offset) -{ - if (inode->i_size < offset) { - unsigned long limit; - - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out_big; - i_size_write(inode, offset); - } else { - struct address_space *mapping = inode->i_mapping; - - /* - * truncation of in-use swapfiles is disallowed - it would - * cause subsequent swapout to scribble on the now-freed - * blocks. - */ - if (IS_SWAPFILE(inode)) - return -ETXTBSY; - i_size_write(inode, offset); - - /* - * unmap_mapping_range is called twice, first simply for - * efficiency so that truncate_inode_pages does fewer - * single-page unmaps. However after this first call, and - * before truncate_inode_pages finishes, it is possible for - * private pages to be COWed, which remain after - * truncate_inode_pages finishes, hence the second - * unmap_mapping_range call must be made for correctness. - */ - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(mapping, offset); - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); - } - - if (inode->i_op->truncate) - inode->i_op->truncate(inode); - return 0; - -out_sig: - send_sig(SIGXFSZ, current, 0); -out_big: - return -EFBIG; -} -EXPORT_SYMBOL(vmtruncate); - int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) { struct address_space *mapping = inode->i_mapping; @@ -2559,8 +2504,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out; entry = pte_to_swp_entry(orig_pte); - if (is_migration_entry(entry)) { - migration_entry_wait(mm, pmd, address); + if (unlikely(non_swap_entry(entry))) { + if (is_migration_entry(entry)) { + migration_entry_wait(mm, pmd, address); + } else if (is_hwpoison_entry(entry)) { + ret = VM_FAULT_HWPOISON; + } else { + print_bad_pte(vma, address, orig_pte, NULL); + ret = VM_FAULT_OOM; + } goto out; } delayacct_set_flag(DELAYACCT_PF_SWAPIN); @@ -2584,6 +2536,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Had to read the page from swap area: Major fault */ ret = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); + } else if (PageHWPoison(page)) { + ret = VM_FAULT_HWPOISON; + delayacct_clear_flag(DELAYACCT_PF_SWAPIN); + goto out; } lock_page(page); @@ -2760,6 +2716,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) return ret; + if (unlikely(PageHWPoison(vmf.page))) { + if (ret & VM_FAULT_LOCKED) + unlock_page(vmf.page); + return VM_FAULT_HWPOISON; + } + /* * For consistency in subsequent calls, make the faulted page always * locked. diff --git a/mm/migrate.c b/mm/migrate.c index 16052e80aaa..1a4bf481378 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -675,7 +675,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, } /* Establish migration ptes or remove ptes */ - try_to_unmap(page, 1); + try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); skip_unmap: if (!page_mapped(page)) diff --git a/mm/mremap.c b/mm/mremap.c index 20a07dba6be..97bff254771 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, if (vma->vm_file) { /* * Subtle point from Rajesh Venkatasubramanian: before - * moving file-based ptes, we must lock vmtruncate out, - * since it might clean the dst vma before the src vma, + * moving file-based ptes, we must lock truncate_pagecache + * out, since it might clean the dst vma before the src vma, * and we propagate stale pages into the dst afterward. */ mapping = vma->vm_file->f_mapping; diff --git a/mm/nommu.c b/mm/nommu.c index 8d484241d03..56a446f0597 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -83,46 +83,6 @@ struct vm_operations_struct generic_file_vm_ops = { }; /* - * Handle all mappings that got truncated by a "truncate()" - * system call. - * - * NOTE! We have to be ready to update the memory sharing - * between the file and the memory map for a potential last - * incomplete page. Ugly, but necessary. - */ -int vmtruncate(struct inode *inode, loff_t offset) -{ - struct address_space *mapping = inode->i_mapping; - unsigned long limit; - - if (inode->i_size < offset) - goto do_expand; - i_size_write(inode, offset); - - truncate_inode_pages(mapping, offset); - goto out_truncate; - -do_expand: - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out; - i_size_write(inode, offset); - -out_truncate: - if (inode->i_op->truncate) - inode->i_op->truncate(inode); - return 0; -out_sig: - send_sig(SIGXFSZ, current, 0); -out: - return -EFBIG; -} - -EXPORT_SYMBOL(vmtruncate); - -/* * Return the total memory allocated for this pointer, not * just what the caller asked for. * diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5f378dd5880..d99664e8607 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -155,37 +155,37 @@ static void update_completion_period(void) } int dirty_background_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) dirty_background_bytes = 0; return ret; } int dirty_background_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write) dirty_background_ratio = 0; return ret; } int dirty_ratio_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { int old_ratio = vm_dirty_ratio; int ret; - ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_ratio != old_ratio) { update_completion_period(); vm_dirty_bytes = 0; @@ -195,13 +195,13 @@ int dirty_ratio_handler(struct ctl_table *table, int write, int dirty_bytes_handler(struct ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { unsigned long old_bytes = vm_dirty_bytes; int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_bytes != old_bytes) { update_completion_period(); vm_dirty_ratio = 0; @@ -686,9 +686,9 @@ static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs */ int dirty_writeback_centisecs_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); return 0; } @@ -1149,6 +1149,13 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) EXPORT_SYMBOL(redirty_page_for_writepage); /* + * Dirty a page. + * + * For pages with a mapping this should be done under the page lock + * for the benefit of asynchronous memory errors who prefer a consistent + * dirty state. This rule can be broken in some special cases, + * but should be better not to. + * * If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5717f27a070..bf720550b44 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -234,6 +234,12 @@ static void bad_page(struct page *page) static unsigned long nr_shown; static unsigned long nr_unshown; + /* Don't complain about poisoned pages */ + if (PageHWPoison(page)) { + __ClearPageBuddy(page); + return; + } + /* * Allow a burst of 60 reports, then keep quiet for that minute; * or allow a steady drip of one report per second. @@ -666,7 +672,7 @@ static inline void expand(struct zone *zone, struct page *page, /* * This page is about to be returned from the page allocator */ -static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) +static inline int check_new_page(struct page *page) { if (unlikely(page_mapcount(page) | (page->mapping != NULL) | @@ -675,6 +681,18 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) bad_page(page); return 1; } + return 0; +} + +static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) +{ + int i; + + for (i = 0; i < (1 << order); i++) { + struct page *p = page + i; + if (unlikely(check_new_page(p))) + return 1; + } set_page_private(page, 0); set_page_refcounted(page); @@ -2373,7 +2391,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order); * sysctl handler for numa_zonelist_order */ int numa_zonelist_order_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, + void __user *buffer, size_t *length, loff_t *ppos) { char saved_string[NUMA_ZONELIST_ORDER_LEN]; @@ -2382,7 +2400,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write, if (write) strncpy(saved_string, (char*)table->data, NUMA_ZONELIST_ORDER_LEN); - ret = proc_dostring(table, write, file, buffer, length, ppos); + ret = proc_dostring(table, write, buffer, length, ppos); if (ret) return ret; if (write) { @@ -4706,9 +4724,9 @@ module_init(init_per_zone_wmark_min) * changes. */ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec(table, write, file, buffer, length, ppos); + proc_dointvec(table, write, buffer, length, ppos); if (write) setup_per_zone_wmarks(); return 0; @@ -4716,12 +4734,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, #ifdef CONFIG_NUMA int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { struct zone *zone; int rc; - rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + rc = proc_dointvec_minmax(table, write, buffer, length, ppos); if (rc) return rc; @@ -4732,12 +4750,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, } int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { struct zone *zone; int rc; - rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + rc = proc_dointvec_minmax(table, write, buffer, length, ppos); if (rc) return rc; @@ -4758,9 +4776,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, * if in function of the boot time zone sizes. */ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec_minmax(table, write, file, buffer, length, ppos); + proc_dointvec_minmax(table, write, buffer, length, ppos); setup_per_zone_lowmem_reserve(); return 0; } @@ -4772,13 +4790,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, */ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, - struct file *file, void __user *buffer, size_t *length, loff_t *ppos) + void __user *buffer, size_t *length, loff_t *ppos) { struct zone *zone; unsigned int cpu; int ret; - ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); if (!write || (ret == -EINVAL)) return ret; for_each_populated_zone(zone) { diff --git a/mm/quicklist.c b/mm/quicklist.c index 6eedf7e473d..6633965bb27 100644 --- a/mm/quicklist.c +++ b/mm/quicklist.c @@ -29,7 +29,6 @@ static unsigned long max_pages(unsigned long min_pages) int node = numa_node_id(); struct zone *zones = NODE_DATA(node)->node_zones; int num_cpus_on_node; - const struct cpumask *cpumask_on_node = cpumask_of_node(node); node_free_pages = #ifdef CONFIG_ZONE_DMA @@ -42,7 +41,7 @@ static unsigned long max_pages(unsigned long min_pages) max = node_free_pages / FRACTION_OF_NODE_MEM; - num_cpus_on_node = cpus_weight_nr(*cpumask_on_node); + num_cpus_on_node = cpumask_weight(cpumask_of_node(node)); max /= num_cpus_on_node; return max(max, min_pages); diff --git a/mm/rmap.c b/mm/rmap.c index 720fc03a7bc..28aafe2b530 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -36,6 +36,11 @@ * mapping->tree_lock (widely used, in set_page_dirty, * in arch-dependent flush_dcache_mmap_lock, * within inode_lock in __sync_single_inode) + * + * (code doesn't rely on that order so it could be switched around) + * ->tasklist_lock + * anon_vma->lock (memory_failure, collect_procs_anon) + * pte map lock */ #include <linux/mm.h> @@ -191,7 +196,7 @@ void __init anon_vma_init(void) * Getting a lock on a stable anon_vma from a page off the LRU is * tricky: page_lock_anon_vma rely on RCU to guard against the races. */ -static struct anon_vma *page_lock_anon_vma(struct page *page) +struct anon_vma *page_lock_anon_vma(struct page *page) { struct anon_vma *anon_vma; unsigned long anon_mapping; @@ -211,7 +216,7 @@ out: return NULL; } -static void page_unlock_anon_vma(struct anon_vma *anon_vma) +void page_unlock_anon_vma(struct anon_vma *anon_vma) { spin_unlock(&anon_vma->lock); rcu_read_unlock(); @@ -311,7 +316,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, * if the page is not mapped into the page tables of this VMA. Only * valid for normal file or anonymous VMAs. */ -static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) +int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) { unsigned long address; pte_t *pte; @@ -756,7 +761,7 @@ void page_remove_rmap(struct page *page) * repeatedly from either try_to_unmap_anon or try_to_unmap_file. */ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, - int migration) + enum ttu_flags flags) { struct mm_struct *mm = vma->vm_mm; unsigned long address; @@ -778,11 +783,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * If it's recently referenced (perhaps page_referenced * skipped over this mm) then we should reactivate it. */ - if (!migration) { + if (!(flags & TTU_IGNORE_MLOCK)) { if (vma->vm_flags & VM_LOCKED) { ret = SWAP_MLOCK; goto out_unmap; } + } + if (!(flags & TTU_IGNORE_ACCESS)) { if (ptep_clear_flush_young_notify(vma, address, pte)) { ret = SWAP_FAIL; goto out_unmap; @@ -800,7 +807,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, /* Update high watermark before we lower rss */ update_hiwater_rss(mm); - if (PageAnon(page)) { + if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { + if (PageAnon(page)) + dec_mm_counter(mm, anon_rss); + else + dec_mm_counter(mm, file_rss); + set_pte_at(mm, address, pte, + swp_entry_to_pte(make_hwpoison_entry(page))); + } else if (PageAnon(page)) { swp_entry_t entry = { .val = page_private(page) }; if (PageSwapCache(page)) { @@ -822,12 +836,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * pte. do_swap_page() will wait until the migration * pte is removed and then restart fault handling. */ - BUG_ON(!migration); + BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION); entry = make_migration_entry(page, pte_write(pteval)); } set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); BUG_ON(pte_file(*pte)); - } else if (PAGE_MIGRATION && migration) { + } else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) { /* Establish migration entry for a file page */ swp_entry_t entry; entry = make_migration_entry(page, pte_write(pteval)); @@ -996,12 +1010,13 @@ static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma) * vm_flags for that VMA. That should be OK, because that vma shouldn't be * 'LOCKED. */ -static int try_to_unmap_anon(struct page *page, int unlock, int migration) +static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) { struct anon_vma *anon_vma; struct vm_area_struct *vma; unsigned int mlocked = 0; int ret = SWAP_AGAIN; + int unlock = TTU_ACTION(flags) == TTU_MUNLOCK; if (MLOCK_PAGES && unlikely(unlock)) ret = SWAP_SUCCESS; /* default for try_to_munlock() */ @@ -1017,7 +1032,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration) continue; /* must visit all unlocked vmas */ ret = SWAP_MLOCK; /* saw at least one mlocked vma */ } else { - ret = try_to_unmap_one(page, vma, migration); + ret = try_to_unmap_one(page, vma, flags); if (ret == SWAP_FAIL || !page_mapped(page)) break; } @@ -1041,8 +1056,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration) /** * try_to_unmap_file - unmap/unlock file page using the object-based rmap method * @page: the page to unmap/unlock - * @unlock: request for unlock rather than unmap [unlikely] - * @migration: unmapping for migration - ignored if @unlock + * @flags: action and flags * * Find all the mappings of a page using the mapping pointer and the vma chains * contained in the address_space struct it points to. @@ -1054,7 +1068,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration) * vm_flags for that VMA. That should be OK, because that vma shouldn't be * 'LOCKED. */ -static int try_to_unmap_file(struct page *page, int unlock, int migration) +static int try_to_unmap_file(struct page *page, enum ttu_flags flags) { struct address_space *mapping = page->mapping; pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); @@ -1066,6 +1080,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) unsigned long max_nl_size = 0; unsigned int mapcount; unsigned int mlocked = 0; + int unlock = TTU_ACTION(flags) == TTU_MUNLOCK; if (MLOCK_PAGES && unlikely(unlock)) ret = SWAP_SUCCESS; /* default for try_to_munlock() */ @@ -1078,7 +1093,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) continue; /* must visit all vmas */ ret = SWAP_MLOCK; } else { - ret = try_to_unmap_one(page, vma, migration); + ret = try_to_unmap_one(page, vma, flags); if (ret == SWAP_FAIL || !page_mapped(page)) goto out; } @@ -1103,7 +1118,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) ret = SWAP_MLOCK; /* leave mlocked == 0 */ goto out; /* no need to look further */ } - if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED)) + if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) && + (vma->vm_flags & VM_LOCKED)) continue; cursor = (unsigned long) vma->vm_private_data; if (cursor > max_nl_cursor) @@ -1137,7 +1153,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) do { list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) { - if (!MLOCK_PAGES && !migration && + if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) && (vma->vm_flags & VM_LOCKED)) continue; cursor = (unsigned long) vma->vm_private_data; @@ -1177,7 +1193,7 @@ out: /** * try_to_unmap - try to remove all page table mappings to a page * @page: the page to get unmapped - * @migration: migration flag + * @flags: action and flags * * Tries to remove all the page table entries which are mapping this * page, used in the pageout path. Caller must hold the page lock. @@ -1188,16 +1204,16 @@ out: * SWAP_FAIL - the page is unswappable * SWAP_MLOCK - page is mlocked. */ -int try_to_unmap(struct page *page, int migration) +int try_to_unmap(struct page *page, enum ttu_flags flags) { int ret; BUG_ON(!PageLocked(page)); if (PageAnon(page)) - ret = try_to_unmap_anon(page, 0, migration); + ret = try_to_unmap_anon(page, flags); else - ret = try_to_unmap_file(page, 0, migration); + ret = try_to_unmap_file(page, flags); if (ret != SWAP_MLOCK && !page_mapped(page)) ret = SWAP_SUCCESS; return ret; @@ -1222,8 +1238,8 @@ int try_to_munlock(struct page *page) VM_BUG_ON(!PageLocked(page) || PageLRU(page)); if (PageAnon(page)) - return try_to_unmap_anon(page, 1, 0); + return try_to_unmap_anon(page, TTU_MUNLOCK); else - return try_to_unmap_file(page, 1, 0); + return try_to_unmap_file(page, TTU_MUNLOCK); } diff --git a/mm/shmem.c b/mm/shmem.c index b206a7a32e2..98631c26c20 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1633,8 +1633,8 @@ shmem_write_end(struct file *file, struct address_space *mapping, if (pos + copied > inode->i_size) i_size_write(inode, pos + copied); - unlock_page(page); set_page_dirty(page); + unlock_page(page); page_cache_release(page); return copied; @@ -1971,13 +1971,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s iput(inode); return error; } - unlock_page(page); inode->i_mapping->a_ops = &shmem_aops; inode->i_op = &shmem_symlink_inode_operations; kaddr = kmap_atomic(page, KM_USER0); memcpy(kaddr, symname, len); kunmap_atomic(kaddr, KM_USER0); set_page_dirty(page); + unlock_page(page); page_cache_release(page); } if (dir->i_mode & S_ISGID) @@ -2420,6 +2420,7 @@ static const struct address_space_operations shmem_aops = { .write_end = shmem_write_end, #endif .migratepage = migrate_page, + .error_remove_page = generic_error_remove_page, }; static const struct file_operations shmem_file_operations = { diff --git a/mm/swapfile.c b/mm/swapfile.c index f1bf19daadc..4de7f02f820 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -699,7 +699,7 @@ int free_swap_and_cache(swp_entry_t entry) struct swap_info_struct *p; struct page *page = NULL; - if (is_migration_entry(entry)) + if (non_swap_entry(entry)) return 1; p = swap_info_get(entry); @@ -2085,7 +2085,7 @@ static int __swap_duplicate(swp_entry_t entry, bool cache) int count; bool has_cache; - if (is_migration_entry(entry)) + if (non_swap_entry(entry)) return -EINVAL; type = swp_type(entry); diff --git a/mm/truncate.c b/mm/truncate.c index ccc3ecf7cb9..450cebdabfc 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -93,11 +93,11 @@ EXPORT_SYMBOL(cancel_dirty_page); * its lock, b) when a concurrent invalidate_mapping_pages got there first and * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. */ -static void +static int truncate_complete_page(struct address_space *mapping, struct page *page) { if (page->mapping != mapping) - return; + return -EIO; if (page_has_private(page)) do_invalidatepage(page, 0); @@ -108,6 +108,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page) remove_from_page_cache(page); ClearPageMappedToDisk(page); page_cache_release(page); /* pagecache ref */ + return 0; } /* @@ -135,6 +136,51 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) return ret; } +int truncate_inode_page(struct address_space *mapping, struct page *page) +{ + if (page_mapped(page)) { + unmap_mapping_range(mapping, + (loff_t)page->index << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, 0); + } + return truncate_complete_page(mapping, page); +} + +/* + * Used to get rid of pages on hardware memory corruption. + */ +int generic_error_remove_page(struct address_space *mapping, struct page *page) +{ + if (!mapping) + return -EINVAL; + /* + * Only punch for normal data pages for now. + * Handling other types like directories would need more auditing. + */ + if (!S_ISREG(mapping->host->i_mode)) + return -EIO; + return truncate_inode_page(mapping, page); +} +EXPORT_SYMBOL(generic_error_remove_page); + +/* + * Safely invalidate one page from its pagecache mapping. + * It only drops clean, unused pages. The page must be locked. + * + * Returns 1 if the page is successfully invalidated, otherwise 0. + */ +int invalidate_inode_page(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + if (!mapping) + return 0; + if (PageDirty(page) || PageWriteback(page)) + return 0; + if (page_mapped(page)) + return 0; + return invalidate_complete_page(mapping, page); +} + /** * truncate_inode_pages - truncate range of pages specified by start & end byte offsets * @mapping: mapping to truncate @@ -196,12 +242,7 @@ void truncate_inode_pages_range(struct address_space *mapping, unlock_page(page); continue; } - if (page_mapped(page)) { - unmap_mapping_range(mapping, - (loff_t)page_index<<PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, 0); - } - truncate_complete_page(mapping, page); + truncate_inode_page(mapping, page); unlock_page(page); } pagevec_release(&pvec); @@ -238,15 +279,10 @@ void truncate_inode_pages_range(struct address_space *mapping, break; lock_page(page); wait_on_page_writeback(page); - if (page_mapped(page)) { - unmap_mapping_range(mapping, - (loff_t)page->index<<PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, 0); - } + truncate_inode_page(mapping, page); if (page->index > next) next = page->index; next++; - truncate_complete_page(mapping, page); unlock_page(page); } pagevec_release(&pvec); @@ -311,12 +347,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, if (lock_failed) continue; - if (PageDirty(page) || PageWriteback(page)) - goto unlock; - if (page_mapped(page)) - goto unlock; - ret += invalidate_complete_page(mapping, page); -unlock: + ret += invalidate_inode_page(page); + unlock_page(page); if (next > end) break; @@ -465,3 +497,67 @@ int invalidate_inode_pages2(struct address_space *mapping) return invalidate_inode_pages2_range(mapping, 0, -1); } EXPORT_SYMBOL_GPL(invalidate_inode_pages2); + +/** + * truncate_pagecache - unmap and remove pagecache that has been truncated + * @inode: inode + * @old: old file offset + * @new: new file offset + * + * inode's new i_size must already be written before truncate_pagecache + * is called. + * + * This function should typically be called before the filesystem + * releases resources associated with the freed range (eg. deallocates + * blocks). This way, pagecache will always stay logically coherent + * with on-disk format, and the filesystem would not have to deal with + * situations such as writepage being called for a page that has already + * had its underlying blocks deallocated. + */ +void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) +{ + if (new < old) { + struct address_space *mapping = inode->i_mapping; + + /* + * unmap_mapping_range is called twice, first simply for + * efficiency so that truncate_inode_pages does fewer + * single-page unmaps. However after this first call, and + * before truncate_inode_pages finishes, it is possible for + * private pages to be COWed, which remain after + * truncate_inode_pages finishes, hence the second + * unmap_mapping_range call must be made for correctness. + */ + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(mapping, new); + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); + } +} +EXPORT_SYMBOL(truncate_pagecache); + +/** + * vmtruncate - unmap mappings "freed" by truncate() syscall + * @inode: inode of the file used + * @offset: file offset to start truncating + * + * NOTE! We have to be ready to update the memory sharing + * between the file and the memory map for a potential last + * incomplete page. Ugly, but necessary. + */ +int vmtruncate(struct inode *inode, loff_t offset) +{ + loff_t oldsize; + int error; + + error = inode_newsize_ok(inode, offset); + if (error) + return error; + oldsize = inode->i_size; + i_size_write(inode, offset); + truncate_pagecache(inode, oldsize, offset); + if (inode->i_op->truncate) + inode->i_op->truncate(inode); + + return error; +} +EXPORT_SYMBOL(vmtruncate); diff --git a/mm/vmscan.c b/mm/vmscan.c index 613e89f471d..1219ceb8a9b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -663,7 +663,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, * processes. Try to unmap it here. */ if (page_mapped(page) && mapping) { - switch (try_to_unmap(page, 0)) { + switch (try_to_unmap(page, TTU_UNMAP)) { case SWAP_FAIL: goto activate_locked; case SWAP_AGAIN: @@ -1836,11 +1836,45 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #ifdef CONFIG_CGROUP_MEM_RES_CTLR +unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, + gfp_t gfp_mask, bool noswap, + unsigned int swappiness, + struct zone *zone, int nid) +{ + struct scan_control sc = { + .may_writepage = !laptop_mode, + .may_unmap = 1, + .may_swap = !noswap, + .swap_cluster_max = SWAP_CLUSTER_MAX, + .swappiness = swappiness, + .order = 0, + .mem_cgroup = mem, + .isolate_pages = mem_cgroup_isolate_pages, + }; + nodemask_t nm = nodemask_of_node(nid); + + sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | + (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); + sc.nodemask = &nm; + sc.nr_reclaimed = 0; + sc.nr_scanned = 0; + /* + * NOTE: Although we can get the priority field, using it + * here is not a good idea, since it limits the pages we can scan. + * if we don't reclaim here, the shrink_zone from balance_pgdat + * will pick up pages from other mem cgroup's as well. We hack + * the priority and make it zero. + */ + shrink_zone(0, zone, &sc); + return sc.nr_reclaimed; +} + unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, gfp_t gfp_mask, bool noswap, unsigned int swappiness) { + struct zonelist *zonelist; struct scan_control sc = { .may_writepage = !laptop_mode, .may_unmap = 1, @@ -1852,7 +1886,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, .isolate_pages = mem_cgroup_isolate_pages, .nodemask = NULL, /* we don't care the placement */ }; - struct zonelist *zonelist; sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); @@ -1974,6 +2007,7 @@ loop_again: for (i = 0; i <= end_zone; i++) { struct zone *zone = pgdat->node_zones + i; int nr_slab; + int nid, zid; if (!populated_zone(zone)) continue; @@ -1988,6 +2022,15 @@ loop_again: temp_priority[i] = priority; sc.nr_scanned = 0; note_zone_scanning_priority(zone, priority); + + nid = pgdat->node_id; + zid = zone_idx(zone); + /* + * Call soft limit reclaim before calling shrink_zone. + * For now we ignore the return value + */ + mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask, + nid, zid); /* * We put equal pressure on every zone, unless one * zone has way too many pages free already. @@ -2801,10 +2844,10 @@ static void scan_all_zones_unevictable_pages(void) unsigned long scan_unevictable_pages; int scan_unevictable_handler(struct ctl_table *table, int write, - struct file *file, void __user *buffer, + void __user *buffer, size_t *length, loff_t *ppos) { - proc_doulongvec_minmax(table, write, file, buffer, length, ppos); + proc_doulongvec_minmax(table, write, buffer, length, ppos); if (write && *(unsigned long *)table->data) scan_all_zones_unevictable_pages(); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index da0f64f82b5..d6b1b054e29 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1781,8 +1781,8 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) ax25_info.idletimer = ax25_display_timer(&ax25->idletimer) / (60 * HZ); ax25_info.n2count = ax25->n2count; ax25_info.state = ax25->state; - ax25_info.rcv_q = sk_wmem_alloc_get(sk); - ax25_info.snd_q = sk_rmem_alloc_get(sk); + ax25_info.rcv_q = sk_rmem_alloc_get(sk); + ax25_info.snd_q = sk_wmem_alloc_get(sk); ax25_info.vs = ax25->vs; ax25_info.vr = ax25->vr; ax25_info.va = ax25->va; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 907a82e9023..a16a2342f6b 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -965,12 +965,12 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { #ifdef CONFIG_SYSCTL static -int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp, +int brnf_sysctl_call_tables(ctl_table * ctl, int write, void __user * buffer, size_t * lenp, loff_t * ppos) { int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write && *(int *)(ctl->data)) *(int *)(ctl->data) = 1; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 0bcecbf0658..4d11c28ca8c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -192,11 +192,10 @@ #define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ /* Thread control flag bits */ -#define T_TERMINATE (1<<0) -#define T_STOP (1<<1) /* Stop run */ -#define T_RUN (1<<2) /* Start run */ -#define T_REMDEVALL (1<<3) /* Remove all devs */ -#define T_REMDEV (1<<4) /* Remove one dev */ +#define T_STOP (1<<0) /* Stop run */ +#define T_RUN (1<<1) /* Start run */ +#define T_REMDEVALL (1<<2) /* Remove all devs */ +#define T_REMDEV (1<<3) /* Remove one dev */ /* If lock -- can be removed after some work */ #define if_lock(t) spin_lock(&(t->if_lock)); @@ -2105,7 +2104,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) { - ktime_t start; + ktime_t start_time, end_time; s32 remaining; struct hrtimer_sleeper t; @@ -2116,7 +2115,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) if (remaining <= 0) return; - start = ktime_now(); + start_time = ktime_now(); if (remaining < 100) udelay(remaining); /* really small just spin */ else { @@ -2135,7 +2134,10 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) } while (t.task && pkt_dev->running && !signal_pending(current)); __set_current_state(TASK_RUNNING); } - pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), start)); + end_time = ktime_now(); + + pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); + pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay); } static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) @@ -3365,19 +3367,29 @@ static void pktgen_rem_thread(struct pktgen_thread *t) mutex_unlock(&pktgen_thread_lock); } -static void idle(struct pktgen_dev *pkt_dev) +static void pktgen_resched(struct pktgen_dev *pkt_dev) { ktime_t idle_start = ktime_now(); + schedule(); + pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); +} - if (need_resched()) - schedule(); - else - cpu_relax(); +static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) +{ + ktime_t idle_start = ktime_now(); + while (atomic_read(&(pkt_dev->skb->users)) != 1) { + if (signal_pending(current)) + break; + + if (need_resched()) + pktgen_resched(pkt_dev); + else + cpu_relax(); + } pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); } - static void pktgen_xmit(struct pktgen_dev *pkt_dev) { struct net_device *odev = pkt_dev->odev; @@ -3387,36 +3399,21 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) u16 queue_map; int ret; - if (pkt_dev->delay) { - spin(pkt_dev, pkt_dev->next_tx); - - /* This is max DELAY, this has special meaning of - * "never transmit" - */ - if (pkt_dev->delay == ULLONG_MAX) { - pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX); - return; - } - } - - if (!pkt_dev->skb) { - set_cur_queue_map(pkt_dev); - queue_map = pkt_dev->cur_queue_map; - } else { - queue_map = skb_get_queue_mapping(pkt_dev->skb); + /* If device is offline, then don't send */ + if (unlikely(!netif_running(odev) || !netif_carrier_ok(odev))) { + pktgen_stop_device(pkt_dev); + return; } - txq = netdev_get_tx_queue(odev, queue_map); - /* Did we saturate the queue already? */ - if (netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq)) { - /* If device is down, then all queues are permnantly frozen */ - if (netif_running(odev)) - idle(pkt_dev); - else - pktgen_stop_device(pkt_dev); + /* This is max DELAY, this has special meaning of + * "never transmit" + */ + if (unlikely(pkt_dev->delay == ULLONG_MAX)) { + pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX); return; } + /* If no skb or clone count exhausted then get new one */ if (!pkt_dev->skb || (pkt_dev->last_ok && ++pkt_dev->clone_count >= pkt_dev->clone_skb)) { /* build a new pkt */ @@ -3435,54 +3432,45 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->clone_count = 0; /* reset counter */ } - /* fill_packet() might have changed the queue */ + if (pkt_dev->delay && pkt_dev->last_ok) + spin(pkt_dev, pkt_dev->next_tx); + queue_map = skb_get_queue_mapping(pkt_dev->skb); txq = netdev_get_tx_queue(odev, queue_map); __netif_tx_lock_bh(txq); - if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) - pkt_dev->last_ok = 0; - else { - atomic_inc(&(pkt_dev->skb->users)); + atomic_inc(&(pkt_dev->skb->users)); - retry_now: + if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) + ret = NETDEV_TX_BUSY; + else ret = (*xmit)(pkt_dev->skb, odev); - switch (ret) { - case NETDEV_TX_OK: - txq_trans_update(txq); - pkt_dev->last_ok = 1; - pkt_dev->sofar++; - pkt_dev->seq_num++; - pkt_dev->tx_bytes += pkt_dev->cur_pkt_size; - break; - case NETDEV_TX_LOCKED: - cpu_relax(); - goto retry_now; - default: /* Drivers are not supposed to return other values! */ - if (net_ratelimit()) - pr_info("pktgen: %s xmit error: %d\n", - odev->name, ret); - pkt_dev->errors++; - /* fallthru */ - case NETDEV_TX_BUSY: - /* Retry it next time */ - atomic_dec(&(pkt_dev->skb->users)); - pkt_dev->last_ok = 0; - } - - if (pkt_dev->delay) - pkt_dev->next_tx = ktime_add_ns(ktime_now(), - pkt_dev->delay); + + switch (ret) { + case NETDEV_TX_OK: + txq_trans_update(txq); + pkt_dev->last_ok = 1; + pkt_dev->sofar++; + pkt_dev->seq_num++; + pkt_dev->tx_bytes += pkt_dev->cur_pkt_size; + break; + default: /* Drivers are not supposed to return other values! */ + if (net_ratelimit()) + pr_info("pktgen: %s xmit error: %d\n", + odev->name, ret); + pkt_dev->errors++; + /* fallthru */ + case NETDEV_TX_LOCKED: + case NETDEV_TX_BUSY: + /* Retry it next time */ + atomic_dec(&(pkt_dev->skb->users)); + pkt_dev->last_ok = 0; } __netif_tx_unlock_bh(txq); /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { - while (atomic_read(&(pkt_dev->skb->users)) != 1) { - if (signal_pending(current)) - break; - idle(pkt_dev); - } + pktgen_wait_for_skb(pkt_dev); /* Done with this */ pktgen_stop_device(pkt_dev); @@ -3515,20 +3503,24 @@ static int pktgen_thread_worker(void *arg) while (!kthread_should_stop()) { pkt_dev = next_to_run(t); - if (!pkt_dev && - (t->control & (T_STOP | T_RUN | T_REMDEVALL | T_REMDEV)) - == 0) { - prepare_to_wait(&(t->queue), &wait, - TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 10); - finish_wait(&(t->queue), &wait); + if (unlikely(!pkt_dev && t->control == 0)) { + wait_event_interruptible_timeout(t->queue, + t->control != 0, + HZ/10); + continue; } __set_current_state(TASK_RUNNING); - if (pkt_dev) + if (likely(pkt_dev)) { pktgen_xmit(pkt_dev); + if (need_resched()) + pktgen_resched(pkt_dev); + else + cpu_relax(); + } + if (t->control & T_STOP) { pktgen_stop(t); t->control &= ~(T_STOP); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 1c6a5bb6f0c..6e1f085db06 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -164,7 +164,7 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU static int min_priority[1]; static int max_priority[] = { 127 }; /* From DECnet spec */ -static int dn_forwarding_proc(ctl_table *, int, struct file *, +static int dn_forwarding_proc(ctl_table *, int, void __user *, size_t *, loff_t *); static int dn_forwarding_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, @@ -274,7 +274,6 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) } static int dn_forwarding_proc(ctl_table *table, int write, - struct file *filep, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -290,7 +289,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, dn_db = dev->dn_ptr; old = dn_db->parms.forwarding; - err = proc_dointvec(table, write, filep, buffer, lenp, ppos); + err = proc_dointvec(table, write, buffer, lenp, ppos); if ((err >= 0) && write) { if (dn_db->parms.forwarding < 0) diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 5bcd592ae6d..26b0ab1e9f5 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -165,7 +165,6 @@ static int dn_node_address_strategy(ctl_table *table, } static int dn_node_address_handler(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -276,7 +275,6 @@ static int dn_def_dev_strategy(ctl_table *table, static int dn_def_dev_handler(ctl_table *table, int write, - struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 07336c6201f..e92f1fd28aa 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1270,10 +1270,10 @@ static void inet_forward_change(struct net *net) } static int devinet_conf_proc(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) { struct ipv4_devconf *cnf = ctl->extra1; @@ -1342,12 +1342,12 @@ static int devinet_conf_sysctl(ctl_table *table, } static int devinet_sysctl_forward(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; - int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write && *valp != val) { struct net *net = ctl->extra2; @@ -1372,12 +1372,12 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, } int ipv4_doint_and_flush(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; - int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); struct net *net = ctl->extra2; if (write && *valp != val) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d9645c94a06..41ada9904d3 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -66,10 +66,7 @@ solution, but it supposes maintaing new variable in ALL skb, even if no tunneling is used. - Current solution: t->recursion lock breaks dead loops. It looks - like dev->tbusy flag, but I preferred new variable, because - the semantics is different. One day, when hard_start_xmit - will be multithreaded we will have to use skb->encapsulation. + Current solution: HARD_TX_LOCK lock breaks dead loops. @@ -678,11 +675,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev __be32 dst; int mtu; - if (tunnel->recursion++) { - stats->collisions++; - goto tx_error; - } - if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -820,7 +812,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev ip_rt_put(rt); stats->tx_dropped++; dev_kfree_skb(skb); - tunnel->recursion--; return NETDEV_TX_OK; } if (skb->sk) @@ -888,7 +879,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev nf_reset(skb); IPTUNNEL_XMIT(); - tunnel->recursion--; return NETDEV_TX_OK; tx_error_icmp: @@ -897,7 +887,6 @@ tx_error_icmp: tx_error: stats->tx_errors++; dev_kfree_skb(skb); - tunnel->recursion--; return NETDEV_TX_OK; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index fc7993e9061..5a0693576e8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -611,6 +611,9 @@ static int do_ip_setsockopt(struct sock *sk, int level, * Check the arguments are allowable */ + if (optlen < sizeof(struct in_addr)) + goto e_inval; + err = -EFAULT; if (optlen >= sizeof(struct ip_mreqn)) { if (copy_from_user(&mreq, optval, sizeof(mreq))) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 62548cb0923..08ccd344de7 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -402,11 +402,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) __be32 dst = tiph->daddr; int mtu; - if (tunnel->recursion++) { - stats->collisions++; - goto tx_error; - } - if (skb->protocol != htons(ETH_P_IP)) goto tx_error; @@ -485,7 +480,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) ip_rt_put(rt); stats->tx_dropped++; dev_kfree_skb(skb); - tunnel->recursion--; return NETDEV_TX_OK; } if (skb->sk) @@ -523,7 +517,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) nf_reset(skb); IPTUNNEL_XMIT(); - tunnel->recursion--; return NETDEV_TX_OK; tx_error_icmp: @@ -531,7 +524,6 @@ tx_error_icmp: tx_error: stats->tx_errors++; dev_kfree_skb(skb); - tunnel->recursion--; return NETDEV_TX_OK; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index df934731453..bb419925202 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3036,7 +3036,7 @@ void ip_rt_multicast_event(struct in_device *in_dev) #ifdef CONFIG_SYSCTL static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, - struct file *filp, void __user *buffer, + void __user *buffer, size_t *lenp, loff_t *ppos) { if (write) { @@ -3046,7 +3046,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, memcpy(&ctl, __ctl, sizeof(ctl)); ctl.data = &flush_delay; - proc_dointvec(&ctl, write, filp, buffer, lenp, ppos); + proc_dointvec(&ctl, write, buffer, lenp, ppos); net = (struct net *)__ctl->extra1; rt_cache_flush(net, flush_delay); @@ -3106,12 +3106,11 @@ static void rt_secret_reschedule(int old) } static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, - struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { int old = ip_rt_secret_interval; - int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos); + int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); rt_secret_reschedule(old); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4710d219f06..2dcf04d9b00 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -36,7 +36,7 @@ static void set_local_port_range(int range[2]) } /* Validate changes from /proc interface. */ -static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, +static int ipv4_local_port_range(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -51,7 +51,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, }; inet_get_local_port_range(range, range + 1); - ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { if (range[1] < range[0]) @@ -91,7 +91,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, } -static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp, +static int proc_tcp_congestion_control(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char val[TCP_CA_NAME_MAX]; @@ -103,7 +103,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * tcp_get_default_congestion_control(val); - ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) ret = tcp_set_default_congestion_control(val); return ret; @@ -129,7 +129,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, } static int proc_tcp_available_congestion_control(ctl_table *ctl, - int write, struct file * filp, + int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -140,13 +140,13 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl, if (!tbl.data) return -ENOMEM; tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX); - ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); kfree(tbl.data); return ret; } static int proc_allowed_congestion_control(ctl_table *ctl, - int write, struct file * filp, + int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -158,7 +158,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return -ENOMEM; tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen); - ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) ret = tcp_set_allowed_congestion_control(tbl.data); kfree(tbl.data); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 55f486d89c8..1fd0a3d775d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3986,14 +3986,14 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) #ifdef CONFIG_SYSCTL static -int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, +int addrconf_sysctl_forward(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) ret = addrconf_fixup_forwarding(ctl, valp, val); @@ -4090,14 +4090,14 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) } static -int addrconf_sysctl_disable(ctl_table *ctl, int write, struct file * filp, +int addrconf_sysctl_disable(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write) ret = addrconf_disable_ipv6(ctl, valp, val); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 7d25bbe3211..c595bbe1ed9 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1043,11 +1043,6 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct net_device_stats *stats = &t->dev->stats; int ret; - if (t->recursion++) { - stats->collisions++; - goto tx_err; - } - switch (skb->protocol) { case htons(ETH_P_IP): ret = ip4ip6_tnl_xmit(skb, dev); @@ -1062,14 +1057,12 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (ret < 0) goto tx_err; - t->recursion--; return NETDEV_TX_OK; tx_err: stats->tx_errors++; stats->tx_dropped++; kfree_skb(skb); - t->recursion--; return NETDEV_TX_OK; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7015478797f..498b9b0b0fa 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1735,7 +1735,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl, } } -int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) +int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; @@ -1746,16 +1746,16 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default"); if (strcmp(ctl->procname, "retrans_time") == 0) - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); else if (strcmp(ctl->procname, "base_reachable_time") == 0) ret = proc_dointvec_jiffies(ctl, write, - filp, buffer, lenp, ppos); + buffer, lenp, ppos); else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || (strcmp(ctl->procname, "base_reachable_time_ms") == 0)) ret = proc_dointvec_ms_jiffies(ctl, write, - filp, buffer, lenp, ppos); + buffer, lenp, ppos); else ret = -1; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 77aecbe8ff6..d6fe7646a8f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2524,13 +2524,13 @@ static const struct file_operations rt6_stats_seq_fops = { #ifdef CONFIG_SYSCTL static -int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, +int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; int delay = net->ipv6.sysctl.flush_delay; if (write) { - proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + proc_dointvec(ctl, write, buffer, lenp, ppos); fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); return 0; } else diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 0ae4f644818..fcb53962884 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -626,11 +626,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct in6_addr *addr6; int addr_type; - if (tunnel->recursion++) { - stats->collisions++; - goto tx_error; - } - if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; @@ -753,7 +748,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ip_rt_put(rt); stats->tx_dropped++; dev_kfree_skb(skb); - tunnel->recursion--; return NETDEV_TX_OK; } if (skb->sk) @@ -794,7 +788,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, nf_reset(skb); IPTUNNEL_XMIT(); - tunnel->recursion--; return NETDEV_TX_OK; tx_error_icmp: @@ -802,7 +795,6 @@ tx_error_icmp: tx_error: stats->tx_errors++; dev_kfree_skb(skb); - tunnel->recursion--; return NETDEV_TX_OK; } diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 57f8817c397..5c86567e5a7 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -73,12 +73,12 @@ static int min_lap_keepalive_time = 100; /* 100us */ /* For other sysctl, I've no idea of the range. Maybe Dag could help * us on that - Jean II */ -static int do_devname(ctl_table *table, int write, struct file *filp, +static int do_devname(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dostring(table, write, filp, buffer, lenp, ppos); + ret = proc_dostring(table, write, buffer, lenp, ppos); if (ret == 0 && write) { struct ias_value *val; @@ -90,12 +90,12 @@ static int do_devname(ctl_table *table, int write, struct file *filp, } -static int do_discovery(ctl_table *table, int write, struct file *filp, +static int do_discovery(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret) return ret; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 039901109fa..71e10cabf81 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -90,8 +90,8 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->dtim_period = tim_ie->dtim_period; } - /* set default value for buggy APs */ - if (!elems->tim || bss->dtim_period == 0) + /* set default value for buggy AP/no TIM element */ + if (bss->dtim_period == 0) bss->dtim_period = 1; bss->supp_rates_len = 0; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index fba2892b99e..446e9bd4b4b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1496,14 +1496,14 @@ static int ip_vs_zero_all(void) static int -proc_do_defense_mode(ctl_table *table, int write, struct file * filp, +proc_do_defense_mode(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; int val = *valp; int rc; - rc = proc_dointvec(table, write, filp, buffer, lenp, ppos); + rc = proc_dointvec(table, write, buffer, lenp, ppos); if (write && (*valp != val)) { if ((*valp < 0) || (*valp > 3)) { /* Restore the correct value */ @@ -1517,7 +1517,7 @@ proc_do_defense_mode(ctl_table *table, int write, struct file * filp, static int -proc_do_sync_threshold(ctl_table *table, int write, struct file *filp, +proc_do_sync_threshold(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1527,7 +1527,7 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp, /* backup the value first */ memcpy(val, valp, sizeof(val)); - rc = proc_dointvec(table, write, filp, buffer, lenp, ppos); + rc = proc_dointvec(table, write, buffer, lenp, ppos); if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { /* Restore the correct value */ memcpy(valp, val, sizeof(val)); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 4e620305f28..c93494fef8e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -226,7 +226,7 @@ static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; static struct ctl_table_header *nf_log_dir_header; -static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, +static int nf_log_proc_dostring(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { const struct nf_logger *logger; @@ -260,7 +260,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, table->data = "NONE"; else table->data = logger->name; - r = proc_dostring(table, write, filp, buffer, lenp, ppos); + r = proc_dostring(table, write, buffer, lenp, ppos); mutex_unlock(&nf_log_mutex); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 55180b99562..a4bafbf1509 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1609,6 +1609,16 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups) return err; } +void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group) +{ + struct sock *sk; + struct hlist_node *node; + struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; + + sk_for_each_bound(sk, node, &tbl->mc_list) + netlink_update_socket_mc(nlk_sk(sk), group, 0); +} + /** * netlink_clear_multicast_users - kick off multicast listeners * @@ -1619,15 +1629,8 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups) */ void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) { - struct sock *sk; - struct hlist_node *node; - struct netlink_table *tbl = &nl_table[ksk->sk_protocol]; - netlink_table_grab(); - - sk_for_each_bound(sk, node, &tbl->mc_list) - netlink_update_socket_mc(nlk_sk(sk), group, 0); - + __netlink_clear_multicast_users(ksk, group); netlink_table_ungrab(); } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 566941e0336..44ff3f3810f 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -220,10 +220,12 @@ static void __genl_unregister_mc_group(struct genl_family *family, struct net *net; BUG_ON(grp->family != family); + netlink_table_grab(); rcu_read_lock(); for_each_net_rcu(net) - netlink_clear_multicast_users(net->genl_sock, grp->id); + __netlink_clear_multicast_users(net->genl_sock, grp->id); rcu_read_unlock(); + netlink_table_ungrab(); clear_bit(grp->id, mc_groups); list_del(&grp->list); diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index a662e62a99c..f60c0c2aacb 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -168,6 +168,12 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev, goto drop; } + /* Broadcast sending is not implemented */ + if (pn_addr(dst) == PNADDR_BROADCAST) { + err = -EOPNOTSUPP; + goto drop; + } + skb_reset_transport_header(skb); WARN_ON(skb_headroom(skb) & 1); /* HW assumes word alignment */ skb_push(skb, sizeof(struct phonethdr)); diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 7a4ee397d2f..07aa9f08d5f 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -113,6 +113,8 @@ void pn_sock_unhash(struct sock *sk) } EXPORT_SYMBOL(pn_sock_unhash); +static DEFINE_MUTEX(port_mutex); + static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len) { struct sock *sk = sock->sk; @@ -140,9 +142,11 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len) err = -EINVAL; /* attempt to rebind */ goto out; } + WARN_ON(sk_hashed(sk)); + mutex_lock(&port_mutex); err = sk->sk_prot->get_port(sk, pn_port(handle)); if (err) - goto out; + goto out_port; /* get_port() sets the port, bind() sets the address if applicable */ pn->sobject = pn_object(saddr, pn_port(pn->sobject)); @@ -150,6 +154,8 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len) /* Enable RX on the socket */ sk->sk_prot->hash(sk); +out_port: + mutex_unlock(&port_mutex); out: release_sock(sk); return err; @@ -357,8 +363,6 @@ const struct proto_ops phonet_stream_ops = { }; EXPORT_SYMBOL(phonet_stream_ops); -static DEFINE_MUTEX(port_mutex); - /* allocate port for a socket */ int pn_sock_get_port(struct sock *sk, unsigned short sport) { @@ -370,9 +374,7 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport) memset(&try_sa, 0, sizeof(struct sockaddr_pn)); try_sa.spn_family = AF_PHONET; - - mutex_lock(&port_mutex); - + WARN_ON(!mutex_is_locked(&port_mutex)); if (!sport) { /* search free port */ int port, pmin, pmax; @@ -401,8 +403,6 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport) else sock_put(tmpsk); } - mutex_unlock(&port_mutex); - /* the port must be in use already */ return -EADDRINUSE; diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 7b5749ee276..2220f332232 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -56,7 +56,7 @@ void phonet_get_local_port_range(int *min, int *max) } while (read_seqretry(&local_port_range_lock, seq)); } -static int proc_local_port_range(ctl_table *table, int write, struct file *filp, +static int proc_local_port_range(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -70,7 +70,7 @@ static int proc_local_port_range(ctl_table *table, int write, struct file *filp, .extra2 = &local_port_range_max, }; - ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { if (range[1] < range[0]) diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index c70dd7f5258..1db618f56ec 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -8,7 +8,6 @@ #include <linux/types.h> #include <linux/module.h> -#include <linux/utsname.h> #include <linux/sunrpc/clnt.h> #ifdef RPC_DEBUG diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a417d5ab5dd..38829e20500 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -640,10 +640,11 @@ EXPORT_SYMBOL_GPL(rpc_call_async); /** * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run * rpc_execute against it - * @ops: RPC call ops + * @req: RPC request + * @tk_ops: RPC call ops */ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *tk_ops) + const struct rpc_call_ops *tk_ops) { struct rpc_task *task; struct xdr_buf *xbufp = &req->rq_snd_buf; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 858a443f418..49278f83036 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -860,7 +860,8 @@ static void rpc_clntdir_depopulate(struct dentry *dentry) /** * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs - * @path: path from the rpc_pipefs root to the new directory + * @dentry: dentry from the rpc_pipefs root to the new directory + * @name: &struct qstr for the name * @rpc_client: rpc client to associate with this directory * * This creates a directory at the given @path associated with diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 5231f7aaac0..42f9748ae09 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -56,7 +56,7 @@ rpc_unregister_sysctl(void) } } -static int proc_do_xprt(ctl_table *table, int write, struct file *file, +static int proc_do_xprt(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[256]; @@ -71,7 +71,7 @@ static int proc_do_xprt(ctl_table *table, int write, struct file *file, } static int -proc_dodebug(ctl_table *table, int write, struct file *file, +proc_dodebug(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[20], c, *s; diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 87101177825..35fb68b9c8e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -80,7 +80,7 @@ struct kmem_cache *svc_rdma_ctxt_cachep; * current value. */ static int read_reset_stat(ctl_table *table, int write, - struct file *filp, void __user *buffer, size_t *lenp, + void __user *buffer, size_t *lenp, loff_t *ppos) { atomic_t *stat = (atomic_t *)table->data; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bee41546575..37c5475ba25 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -773,6 +773,7 @@ static void xs_close(struct rpc_xprt *xprt) dprintk("RPC: xs_close xprt %p\n", xprt); xs_reset_transport(transport); + xprt->reestablish_timeout = 0; smp_mb__before_clear_bit(); clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); @@ -1264,6 +1265,12 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) if (xprt->shutdown) goto out; + /* Any data means we had a useful conversation, so + * the we don't need to delay the next reconnect + */ + if (xprt->reestablish_timeout) + xprt->reestablish_timeout = 0; + /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ rd_desc.arg.data = xprt; do { @@ -2034,6 +2041,8 @@ static void xs_connect(struct rpc_task *task) &transport->connect_worker, xprt->reestablish_timeout); xprt->reestablish_timeout <<= 1; + if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; } else { diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index d16cd9ea4d0..bf725275eb8 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -26,11 +26,11 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, wdev->wext.connect.ie = wdev->wext.ie; wdev->wext.connect.ie_len = wdev->wext.ie_len; - wdev->wext.connect.privacy = wdev->wext.default_key != -1; if (wdev->wext.keys) { wdev->wext.keys->def = wdev->wext.default_key; wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key; + wdev->wext.connect.privacy = true; } if (!wdev->wext.connect.ssid_len) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index c29be8f9024..4f9c1908593 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -83,11 +83,12 @@ TMPOUT := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/) # is automatically cleaned up. try-run = $(shell set -e; \ TMP="$(TMPOUT).$$$$.tmp"; \ + TMPO="$(TMPOUT).$$$$.o"; \ if ($(1)) >/dev/null 2>&1; \ then echo "$(2)"; \ else echo "$(3)"; \ fi; \ - rm -f "$$TMP") + rm -f "$$TMP" "$$TMPO") # as-option # Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,) @@ -105,12 +106,12 @@ as-instr = $(call try-run,\ # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) cc-option = $(call try-run,\ - $(CC) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2)) + $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2)) # cc-option-yn # Usage: flag := $(call cc-option-yn,-march=winchip-c6) cc-option-yn = $(call try-run,\ - $(CC) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",y,n) + $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",y,n) # cc-option-align # Prefix align with either -falign or -malign @@ -130,10 +131,15 @@ cc-fullversion = $(shell $(CONFIG_SHELL) \ # Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1) cc-ifversion = $(shell [ $(call cc-version, $(CC)) $(1) $(2) ] && echo $(3)) +# cc-ldoption +# Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both) +cc-ldoption = $(call try-run,\ + $(CC) $(1) -nostdlib -xc /dev/null -o "$$TMP",$(1),$(2)) + # ld-option -# Usage: ldflags += $(call ld-option, -Wl$(comma)--hash-style=both) +# Usage: LDFLAGS += $(call ld-option, -X) ld-option = $(call try-run,\ - $(CC) $(1) -nostdlib -xc /dev/null -o "$$TMP",$(1),$(2)) + $(CC) /dev/null -c -o "$$TMPO" ; $(LD) $(1) "$$TMPO" -o "$$TMP",$(1),$(2)) ###### diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 5c4b7a400c1..341b58902ff 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -206,7 +206,7 @@ cmd_modversions = \ endif ifdef CONFIG_FTRACE_MCOUNT_RECORD -cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ +cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(CONFIG_64BIT),64,32)" \ "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ "$(if $(part-of-module),1,0)" "$(@)"; @@ -216,6 +216,7 @@ define rule_cc_o_c $(call echo-cmd,checksrc) $(cmd_checksrc) \ $(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \ $(cmd_modversions) \ + $(call echo-cmd,record_mcount) \ $(cmd_record_mcount) \ scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' > \ $(dot-target).tmp; \ @@ -269,7 +270,8 @@ targets += $(extra-y) $(MAKECMDGOALS) $(always) # Linker scripts preprocessor (.lds.S -> .lds) # --------------------------------------------------------------------------- quiet_cmd_cpp_lds_S = LDS $@ - cmd_cpp_lds_S = $(CPP) $(cpp_flags) -D__ASSEMBLY__ -o $@ $< + cmd_cpp_lds_S = $(CPP) $(cpp_flags) -P -C -U$(ARCH) \ + -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $< $(obj)/%.lds: $(src)/%.lds.S FORCE $(call if_changed_dep,cpp_lds_S) diff --git a/scripts/basic/docproc.c b/scripts/basic/docproc.c index 99ca7a69868..79ab973fb43 100644 --- a/scripts/basic/docproc.c +++ b/scripts/basic/docproc.c @@ -71,7 +71,7 @@ FILELINE * docsection; static char *srctree, *kernsrctree; -void usage (void) +static void usage (void) { fprintf(stderr, "Usage: docproc {doc|depend} file\n"); fprintf(stderr, "Input is read from file.tmpl. Output is sent to stdout\n"); @@ -84,7 +84,7 @@ void usage (void) /* * Execute kernel-doc with parameters given in svec */ -void exec_kernel_doc(char **svec) +static void exec_kernel_doc(char **svec) { pid_t pid; int ret; @@ -129,7 +129,7 @@ struct symfile struct symfile symfilelist[MAXFILES]; int symfilecnt = 0; -void add_new_symbol(struct symfile *sym, char * symname) +static void add_new_symbol(struct symfile *sym, char * symname) { sym->symbollist = realloc(sym->symbollist, (sym->symbolcnt + 1) * sizeof(char *)); @@ -137,14 +137,14 @@ void add_new_symbol(struct symfile *sym, char * symname) } /* Add a filename to the list */ -struct symfile * add_new_file(char * filename) +static struct symfile * add_new_file(char * filename) { symfilelist[symfilecnt++].filename = strdup(filename); return &symfilelist[symfilecnt - 1]; } /* Check if file already are present in the list */ -struct symfile * filename_exist(char * filename) +static struct symfile * filename_exist(char * filename) { int i; for (i=0; i < symfilecnt; i++) @@ -157,20 +157,20 @@ struct symfile * filename_exist(char * filename) * List all files referenced within the template file. * Files are separated by tabs. */ -void adddep(char * file) { printf("\t%s", file); } -void adddep2(char * file, char * line) { line = line; adddep(file); } -void noaction(char * line) { line = line; } -void noaction2(char * file, char * line) { file = file; line = line; } +static void adddep(char * file) { printf("\t%s", file); } +static void adddep2(char * file, char * line) { line = line; adddep(file); } +static void noaction(char * line) { line = line; } +static void noaction2(char * file, char * line) { file = file; line = line; } /* Echo the line without further action */ -void printline(char * line) { printf("%s", line); } +static void printline(char * line) { printf("%s", line); } /* * Find all symbols in filename that are exported with EXPORT_SYMBOL & * EXPORT_SYMBOL_GPL (& EXPORT_SYMBOL_GPL_FUTURE implicitly). * All symbols located are stored in symfilelist. */ -void find_export_symbols(char * filename) +static void find_export_symbols(char * filename) { FILE * fp; struct symfile *sym; @@ -227,7 +227,7 @@ void find_export_symbols(char * filename) * intfunc uses -nofunction * extfunc uses -function */ -void docfunctions(char * filename, char * type) +static void docfunctions(char * filename, char * type) { int i,j; int symcnt = 0; @@ -258,15 +258,15 @@ void docfunctions(char * filename, char * type) fflush(stdout); free(vec); } -void intfunc(char * filename) { docfunctions(filename, NOFUNCTION); } -void extfunc(char * filename) { docfunctions(filename, FUNCTION); } +static void intfunc(char * filename) { docfunctions(filename, NOFUNCTION); } +static void extfunc(char * filename) { docfunctions(filename, FUNCTION); } /* * Document specific function(s) in a file. * Call kernel-doc with the following parameters: * kernel-doc -docbook -function function1 [-function function2] */ -void singfunc(char * filename, char * line) +static void singfunc(char * filename, char * line) { char *vec[200]; /* Enough for specific functions */ int i, idx = 0; @@ -297,7 +297,7 @@ void singfunc(char * filename, char * line) * Call kernel-doc with the following parameters: * kernel-doc -docbook -function "doc section" filename */ -void docsect(char *filename, char *line) +static void docsect(char *filename, char *line) { char *vec[6]; /* kerneldoc -docbook -function "section" file NULL */ char *s; @@ -324,7 +324,7 @@ void docsect(char *filename, char *line) * 5) Lines containing !P * 6) Default lines - lines not matching the above */ -void parse_file(FILE *infile) +static void parse_file(FILE *infile) { char line[MAXLINESZ]; char * s; diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index 8ab44861168..6bf21f83837 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -124,7 +124,7 @@ char *target; char *depfile; char *cmdline; -void usage(void) +static void usage(void) { fprintf(stderr, "Usage: fixdep <depfile> <target> <cmdline>\n"); exit(1); @@ -133,7 +133,7 @@ void usage(void) /* * Print out the commandline prefixed with cmd_<target filename> := */ -void print_cmdline(void) +static void print_cmdline(void) { printf("cmd_%s := %s\n\n", target, cmdline); } @@ -146,7 +146,7 @@ int len_config = 0; * Grow the configuration string to a desired length. * Usually the first growth is plenty. */ -void grow_config(int len) +static void grow_config(int len) { while (len_config + len > size_config) { if (size_config == 0) @@ -162,7 +162,7 @@ void grow_config(int len) /* * Lookup a value in the configuration string. */ -int is_defined_config(const char * name, int len) +static int is_defined_config(const char * name, int len) { const char * pconfig; const char * plast = str_config + len_config - len; @@ -178,7 +178,7 @@ int is_defined_config(const char * name, int len) /* * Add a new value to the configuration string. */ -void define_config(const char * name, int len) +static void define_config(const char * name, int len) { grow_config(len + 1); @@ -190,7 +190,7 @@ void define_config(const char * name, int len) /* * Clear the set of configuration strings. */ -void clear_config(void) +static void clear_config(void) { len_config = 0; define_config("", 0); @@ -199,7 +199,7 @@ void clear_config(void) /* * Record the use of a CONFIG_* word. */ -void use_config(char *m, int slen) +static void use_config(char *m, int slen) { char s[PATH_MAX]; char *p; @@ -220,7 +220,7 @@ void use_config(char *m, int slen) printf(" $(wildcard include/config/%s.h) \\\n", s); } -void parse_config_file(char *map, size_t len) +static void parse_config_file(char *map, size_t len) { int *end = (int *) (map + len); /* start at +1, so that p can never be < map */ @@ -254,7 +254,7 @@ void parse_config_file(char *map, size_t len) } /* test is s ends in sub */ -int strrcmp(char *s, char *sub) +static int strrcmp(char *s, char *sub) { int slen = strlen(s); int sublen = strlen(sub); @@ -265,7 +265,7 @@ int strrcmp(char *s, char *sub) return memcmp(s + slen - sublen, sub, sublen); } -void do_config_file(char *filename) +static void do_config_file(char *filename) { struct stat st; int fd; @@ -296,7 +296,7 @@ void do_config_file(char *filename) close(fd); } -void parse_dep_file(void *map, size_t len) +static void parse_dep_file(void *map, size_t len) { char *m = map; char *end = m + len; @@ -336,7 +336,7 @@ void parse_dep_file(void *map, size_t len) printf("$(deps_%s):\n", target); } -void print_deps(void) +static void print_deps(void) { struct stat st; int fd; @@ -368,7 +368,7 @@ void print_deps(void) close(fd); } -void traps(void) +static void traps(void) { static char test[] __attribute__((aligned(sizeof(int)))) = "CONF"; int *p = (int *)test; diff --git a/scripts/basic/hash.c b/scripts/basic/hash.c index 3299ad7fc8c..2ef5d3f666b 100644 --- a/scripts/basic/hash.c +++ b/scripts/basic/hash.c @@ -21,7 +21,7 @@ static void usage(void) * http://www.cse.yorku.ca/~oz/hash.html */ -unsigned int djb2_hash(char *str) +static unsigned int djb2_hash(char *str) { unsigned long hash = 5381; int c; @@ -34,7 +34,7 @@ unsigned int djb2_hash(char *str) return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1)); } -unsigned int r5_hash(char *str) +static unsigned int r5_hash(char *str) { unsigned long hash = 0; int c; diff --git a/scripts/checkincludes.pl b/scripts/checkincludes.pl index 8e6b716c191..676ddc07d6f 100755 --- a/scripts/checkincludes.pl +++ b/scripts/checkincludes.pl @@ -1,24 +1,85 @@ #!/usr/bin/perl # -# checkincludes: Find files included more than once in (other) files. +# checkincludes: find/remove files included more than once +# # Copyright abandoned, 2000, Niels Kristian Bech Jensen <nkbj@image.dk>. +# Copyright 2009 Luis R. Rodriguez <mcgrof@gmail.com> +# +# This script checks for duplicate includes. It also has support +# to remove them in place. Note that this will not take into +# consideration macros so you should run this only if you know +# you do have real dups and do not have them under #ifdef's. You +# could also just review the results. + +sub usage { + print "Usage: checkincludes.pl [-r]\n"; + print "By default we just warn of duplicates\n"; + print "To remove duplicated includes in place use -r\n"; + exit 1; +} + +my $remove = 0; + +if ($#ARGV < 0) { + usage(); +} + +if ($#ARGV >= 1) { + if ($ARGV[0] =~ /^-/) { + if ($ARGV[0] eq "-r") { + $remove = 1; + shift; + } else { + usage(); + } + } +} foreach $file (@ARGV) { open(FILE, $file) or die "Cannot open $file: $!.\n"; my %includedfiles = (); + my @file_lines = (); while (<FILE>) { if (m/^\s*#\s*include\s*[<"](\S*)[>"]/o) { ++$includedfiles{$1}; } + push(@file_lines, $_); } - - foreach $filename (keys %includedfiles) { - if ($includedfiles{$filename} > 1) { - print "$file: $filename is included more than once.\n"; + + close(FILE); + + if (!$remove) { + foreach $filename (keys %includedfiles) { + if ($includedfiles{$filename} > 1) { + print "$file: $filename is included more than once.\n"; + } } + next; } + open(FILE,">$file") || die("Cannot write to $file: $!"); + + my $dups = 0; + foreach (@file_lines) { + if (m/^\s*#\s*include\s*[<"](\S*)[>"]/o) { + foreach $filename (keys %includedfiles) { + if ($1 eq $filename) { + if ($includedfiles{$filename} > 1) { + $includedfiles{$filename}--; + $dups++; + } else { + print FILE $_; + } + } + } + } else { + print FILE $_; + } + } + if ($dups > 0) { + print "$file: removed $dups duplicate includes\n"; + } close(FILE); } diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index 3baaaecd6b1..9960d1c303f 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -38,14 +38,14 @@ static int conf_cnt; static char line[128]; static struct menu *rootEntry; -static char nohelp_text[] = N_("Sorry, no help available for this option yet.\n"); - -static const char *get_help(struct menu *menu) +static void print_help(struct menu *menu) { - if (menu_has_help(menu)) - return _(menu_get_help(menu)); - else - return nohelp_text; + struct gstr help = str_new(); + + menu_get_ext_help(menu, &help); + + printf("\n%s\n", str_get(&help)); + str_free(&help); } static void strip(char *str) @@ -121,7 +121,7 @@ static int conf_askvalue(struct symbol *sym, const char *def) return 1; } -int conf_string(struct menu *menu) +static int conf_string(struct menu *menu) { struct symbol *sym = menu->sym; const char *def; @@ -140,7 +140,7 @@ int conf_string(struct menu *menu) case '?': /* print help */ if (line[1] == '\n') { - printf("\n%s\n", get_help(menu)); + print_help(menu); def = NULL; break; } @@ -220,7 +220,7 @@ static int conf_sym(struct menu *menu) if (sym_set_tristate_value(sym, newval)) return 0; help: - printf("\n%s\n", get_help(menu)); + print_help(menu); } } @@ -307,7 +307,7 @@ static int conf_choice(struct menu *menu) fgets(line, 128, stdin); strip(line); if (line[0] == '?') { - printf("\n%s\n", get_help(menu)); + print_help(menu); continue; } if (!line[0]) @@ -331,7 +331,7 @@ static int conf_choice(struct menu *menu) if (!child) continue; if (line[strlen(line) - 1] == '?') { - printf("\n%s\n", get_help(child)); + print_help(child); continue; } sym_set_choice_value(sym, child->sym); diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index a04da3459f0..b55e72ff2fc 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -560,7 +560,7 @@ int conf_write(const char *name) return 0; } -int conf_split_config(void) +static int conf_split_config(void) { const char *name; char path[128]; diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c index 579ece4fa58..edd3f39a080 100644 --- a/scripts/kconfig/expr.c +++ b/scripts/kconfig/expr.c @@ -348,7 +348,7 @@ struct expr *expr_trans_bool(struct expr *e) /* * e1 || e2 -> ? */ -struct expr *expr_join_or(struct expr *e1, struct expr *e2) +static struct expr *expr_join_or(struct expr *e1, struct expr *e2) { struct expr *tmp; struct symbol *sym1, *sym2; @@ -412,7 +412,7 @@ struct expr *expr_join_or(struct expr *e1, struct expr *e2) return NULL; } -struct expr *expr_join_and(struct expr *e1, struct expr *e2) +static struct expr *expr_join_and(struct expr *e1, struct expr *e2) { struct expr *tmp; struct symbol *sym1, *sym2; @@ -1098,6 +1098,8 @@ void expr_fprint(struct expr *e, FILE *out) static void expr_print_gstr_helper(void *data, struct symbol *sym, const char *str) { str_append((struct gstr*)data, str); + if (sym) + str_printf((struct gstr*)data, " [=%s]", sym_get_string_value(sym)); } void expr_gstr_print(struct expr *e, struct gstr *gs) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 199b22bb49e..65464366fe3 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -456,19 +456,9 @@ static void text_insert_help(struct menu *menu) GtkTextBuffer *buffer; GtkTextIter start, end; const char *prompt = _(menu_get_prompt(menu)); - gchar *name; - const char *help; + struct gstr help = str_new(); - help = menu_get_help(menu); - - /* Gettextize if the help text not empty */ - if ((help != 0) && (help[0] != 0)) - help = _(help); - - if (menu->sym && menu->sym->name) - name = g_strdup_printf(menu->sym->name); - else - name = g_strdup(""); + menu_get_ext_help(menu, &help); buffer = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_w)); gtk_text_buffer_get_bounds(buffer, &start, &end); @@ -478,14 +468,11 @@ static void text_insert_help(struct menu *menu) gtk_text_buffer_get_end_iter(buffer, &end); gtk_text_buffer_insert_with_tags(buffer, &end, prompt, -1, tag1, NULL); - gtk_text_buffer_insert_at_cursor(buffer, " ", 1); - gtk_text_buffer_get_end_iter(buffer, &end); - gtk_text_buffer_insert_with_tags(buffer, &end, name, -1, tag1, - NULL); gtk_text_buffer_insert_at_cursor(buffer, "\n\n", 2); gtk_text_buffer_get_end_iter(buffer, &end); - gtk_text_buffer_insert_with_tags(buffer, &end, help, -1, tag2, + gtk_text_buffer_insert_with_tags(buffer, &end, str_get(&help), -1, tag2, NULL); + str_free(&help); } diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index 803233fdd6d..b1c86c19292 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -547,7 +547,7 @@ <property name="headers_visible">True</property> <property name="rules_hint">False</property> <property name="reorderable">False</property> - <property name="enable_search">True</property> + <property name="enable_search">False</property> <signal name="cursor_changed" handler="on_treeview2_cursor_changed" last_modification_time="Sun, 12 Jan 2003 15:58:22 GMT"/> <signal name="button_press_event" handler="on_treeview1_button_press_event" last_modification_time="Sun, 12 Jan 2003 16:03:52 GMT"/> <signal name="key_press_event" handler="on_treeview2_key_press_event" last_modification_time="Sun, 12 Jan 2003 16:11:44 GMT"/> @@ -582,7 +582,7 @@ <property name="headers_visible">True</property> <property name="rules_hint">False</property> <property name="reorderable">False</property> - <property name="enable_search">True</property> + <property name="enable_search">False</property> <signal name="cursor_changed" handler="on_treeview2_cursor_changed" last_modification_time="Sun, 12 Jan 2003 15:57:55 GMT"/> <signal name="button_press_event" handler="on_treeview2_button_press_event" last_modification_time="Sun, 12 Jan 2003 15:57:58 GMT"/> <signal name="key_press_event" handler="on_treeview2_key_press_event" last_modification_time="Sun, 12 Jan 2003 15:58:01 GMT"/> diff --git a/scripts/kconfig/kxgettext.c b/scripts/kconfig/kxgettext.c index 8d9ce22b0fc..dcc3fcc0cc9 100644 --- a/scripts/kconfig/kxgettext.c +++ b/scripts/kconfig/kxgettext.c @@ -166,7 +166,7 @@ static int message__add(const char *msg, char *option, char *file, int lineno) return rc; } -void menu_build_message_list(struct menu *menu) +static void menu_build_message_list(struct menu *menu) { struct menu *child; @@ -211,7 +211,7 @@ static void message__print_gettext_msgid_msgstr(struct message *self) "msgstr \"\"\n", self->msg); } -void menu__xgettext(void) +static void menu__xgettext(void) { struct message *m = message__list; diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h index 8e69461313d..ffeb532b2cf 100644 --- a/scripts/kconfig/lkc_proto.h +++ b/scripts/kconfig/lkc_proto.h @@ -17,6 +17,8 @@ P(menu_get_root_menu,struct menu *,(struct menu *menu)); P(menu_get_parent_menu,struct menu *,(struct menu *menu)); P(menu_has_help,bool,(struct menu *menu)); P(menu_get_help,const char *,(struct menu *menu)); +P(get_symbol_str,void,(struct gstr *r, struct symbol *sym)); +P(menu_get_ext_help,void,(struct menu *menu, struct gstr *help)); /* symbol.c */ P(symbol_hash,struct symbol *,[SYMBOL_HASHSIZE]); diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index 25b60bc117f..d8295357358 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -199,8 +199,6 @@ inputbox_instructions_string[] = N_( setmod_text[] = N_( "This feature depends on another which has been configured as a module.\n" "As a result, this feature will be built as a module."), -nohelp_text[] = N_( - "There is no help available for this kernel option.\n"), load_config_text[] = N_( "Enter the name of the configuration file you wish to load. " "Accept the name shown to restore the configuration you " @@ -284,66 +282,6 @@ static void show_textbox(const char *title, const char *text, int r, int c); static void show_helptext(const char *title, const char *text); static void show_help(struct menu *menu); -static void get_prompt_str(struct gstr *r, struct property *prop) -{ - int i, j; - struct menu *submenu[8], *menu; - - str_printf(r, _("Prompt: %s\n"), _(prop->text)); - str_printf(r, _(" Defined at %s:%d\n"), prop->menu->file->name, - prop->menu->lineno); - if (!expr_is_yes(prop->visible.expr)) { - str_append(r, _(" Depends on: ")); - expr_gstr_print(prop->visible.expr, r); - str_append(r, "\n"); - } - menu = prop->menu->parent; - for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent) - submenu[i++] = menu; - if (i > 0) { - str_printf(r, _(" Location:\n")); - for (j = 4; --i >= 0; j += 2) { - menu = submenu[i]; - str_printf(r, "%*c-> %s", j, ' ', _(menu_get_prompt(menu))); - if (menu->sym) { - str_printf(r, " (%s [=%s])", menu->sym->name ? - menu->sym->name : _("<choice>"), - sym_get_string_value(menu->sym)); - } - str_append(r, "\n"); - } - } -} - -static void get_symbol_str(struct gstr *r, struct symbol *sym) -{ - bool hit; - struct property *prop; - - if (sym && sym->name) - str_printf(r, "Symbol: %s [=%s]\n", sym->name, - sym_get_string_value(sym)); - for_all_prompts(sym, prop) - get_prompt_str(r, prop); - hit = false; - for_all_properties(sym, prop, P_SELECT) { - if (!hit) { - str_append(r, " Selects: "); - hit = true; - } else - str_printf(r, " && "); - expr_gstr_print(prop->expr, r); - } - if (hit) - str_append(r, "\n"); - if (sym->rev_dep.expr) { - str_append(r, _(" Selected by: ")); - expr_gstr_print(sym->rev_dep.expr, r); - str_append(r, "\n"); - } - str_append(r, "\n\n"); -} - static struct gstr get_relations_str(struct symbol **sym_arr) { struct symbol *sym; @@ -699,19 +637,9 @@ static void show_helptext(const char *title, const char *text) static void show_help(struct menu *menu) { struct gstr help = str_new(); - struct symbol *sym = menu->sym; - - if (menu_has_help(menu)) - { - if (sym->name) { - str_printf(&help, "CONFIG_%s:\n\n", sym->name); - str_append(&help, _(menu_get_help(menu))); - str_append(&help, "\n"); - } - } else { - str_append(&help, nohelp_text); - } - get_symbol_str(&help, sym); + + menu_get_ext_help(menu, &help); + show_helptext(_(menu_get_prompt(menu)), str_get(&help)); str_free(&help); } diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 07ff8d105c9..059a2465c57 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -9,6 +9,9 @@ #define LKC_DIRECT_LINK #include "lkc.h" +static const char nohelp_text[] = N_( + "There is no help available for this kernel option.\n"); + struct menu rootmenu; static struct menu **last_entry_ptr; @@ -74,7 +77,7 @@ void menu_end_menu(void) current_menu = current_menu->parent; } -struct expr *menu_check_dep(struct expr *e) +static struct expr *menu_check_dep(struct expr *e) { if (!e) return e; @@ -184,7 +187,7 @@ static int menu_range_valid_sym(struct symbol *sym, struct symbol *sym2) (sym2->type == S_UNKNOWN && sym_string_valid(sym, sym2->name)); } -void sym_check_prop(struct symbol *sym) +static void sym_check_prop(struct symbol *sym) { struct property *prop; struct symbol *sym2; @@ -451,3 +454,80 @@ const char *menu_get_help(struct menu *menu) else return ""; } + +static void get_prompt_str(struct gstr *r, struct property *prop) +{ + int i, j; + struct menu *submenu[8], *menu; + + str_printf(r, _("Prompt: %s\n"), _(prop->text)); + str_printf(r, _(" Defined at %s:%d\n"), prop->menu->file->name, + prop->menu->lineno); + if (!expr_is_yes(prop->visible.expr)) { + str_append(r, _(" Depends on: ")); + expr_gstr_print(prop->visible.expr, r); + str_append(r, "\n"); + } + menu = prop->menu->parent; + for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent) + submenu[i++] = menu; + if (i > 0) { + str_printf(r, _(" Location:\n")); + for (j = 4; --i >= 0; j += 2) { + menu = submenu[i]; + str_printf(r, "%*c-> %s", j, ' ', _(menu_get_prompt(menu))); + if (menu->sym) { + str_printf(r, " (%s [=%s])", menu->sym->name ? + menu->sym->name : _("<choice>"), + sym_get_string_value(menu->sym)); + } + str_append(r, "\n"); + } + } +} + +void get_symbol_str(struct gstr *r, struct symbol *sym) +{ + bool hit; + struct property *prop; + + if (sym && sym->name) + str_printf(r, "Symbol: %s [=%s]\n", sym->name, + sym_get_string_value(sym)); + for_all_prompts(sym, prop) + get_prompt_str(r, prop); + hit = false; + for_all_properties(sym, prop, P_SELECT) { + if (!hit) { + str_append(r, " Selects: "); + hit = true; + } else + str_printf(r, " && "); + expr_gstr_print(prop->expr, r); + } + if (hit) + str_append(r, "\n"); + if (sym->rev_dep.expr) { + str_append(r, _(" Selected by: ")); + expr_gstr_print(sym->rev_dep.expr, r); + str_append(r, "\n"); + } + str_append(r, "\n\n"); +} + +void menu_get_ext_help(struct menu *menu, struct gstr *help) +{ + struct symbol *sym = menu->sym; + + if (menu_has_help(menu)) { + if (sym->name) { + str_printf(help, "CONFIG_%s:\n\n", sym->name); + str_append(help, _(menu_get_help(menu))); + str_append(help, "\n"); + } + } else { + str_append(help, nohelp_text); + } + if (sym) + get_symbol_str(help, sym); +} diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc index ce7d508c752..00c51507cfc 100644 --- a/scripts/kconfig/qconf.cc +++ b/scripts/kconfig/qconf.cc @@ -1042,12 +1042,10 @@ void ConfigInfoView::menuInfo(void) if (showDebug()) debug = debug_info(sym); - help = menu_get_help(menu); - /* Gettextize if the help text not empty */ - if (help.isEmpty()) - help = print_filter(menu_get_help(menu)); - else - help = print_filter(_(menu_get_help(menu))); + struct gstr help_gstr = str_new(); + menu_get_ext_help(menu, &help_gstr); + help = print_filter(str_get(&help_gstr)); + str_free(&help_gstr); } else if (menu->prompt) { head += "<big><b>"; head += print_filter(_(menu->prompt->text)); diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 18f3e5c3363..6c8fbbb66eb 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -36,7 +36,7 @@ tristate modules_val; struct expr *sym_env_list; -void sym_add_default(struct symbol *sym, const char *def) +static void sym_add_default(struct symbol *sym, const char *def) { struct property *prop = prop_alloc(P_DEFAULT, sym); @@ -125,7 +125,7 @@ struct property *sym_get_default_prop(struct symbol *sym) return NULL; } -struct property *sym_get_range_prop(struct symbol *sym) +static struct property *sym_get_range_prop(struct symbol *sym) { struct property *prop; @@ -943,7 +943,7 @@ const char *prop_get_type_name(enum prop_type type) return "unknown"; } -void prop_add_env(const char *env) +static void prop_add_env(const char *env) { struct symbol *sym, *sym2; struct property *prop; diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl index 89774011965..5f0fcb712e2 100644 --- a/scripts/markup_oops.pl +++ b/scripts/markup_oops.pl @@ -184,10 +184,7 @@ if ($target eq "0") { # if it's a module, we need to find the .ko file and calculate a load offset if ($module ne "") { - my $dir = dirname($filename); - $dir = $dir . "/"; - my $mod = $module . ".ko"; - my $modulefile = `find $dir -name $mod | head -1`; + my $modulefile = `modinfo $module | grep '^filename:' | awk '{ print \$2 }'`; chomp($modulefile); $filename = $modulefile; if ($filename eq "") { diff --git a/scripts/tags.sh b/scripts/tags.sh index 4a34ec591e8..d52f7a01557 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -101,7 +101,8 @@ exuberant() -I ____cacheline_aligned_in_smp \ -I ____cacheline_internodealigned_in_smp \ -I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL \ - --extra=+f --c-kinds=+px \ + -I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \ + --extra=+f --c-kinds=-px \ --regex-asm='/^ENTRY\(([^)]*)\).*/\1/' \ --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' diff --git a/security/device_cgroup.c b/security/device_cgroup.c index b8186bac8b7..6cf8fd2b79e 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -61,7 +61,8 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) struct cgroup_subsys devices_subsys; static int devcgroup_can_attach(struct cgroup_subsys *ss, - struct cgroup *new_cgroup, struct task_struct *task) + struct cgroup *new_cgroup, struct task_struct *task, + bool threadgroup) { if (current != task && !capable(CAP_SYS_ADMIN)) return -EPERM; diff --git a/security/keys/gc.c b/security/keys/gc.c index 485fc6233c3..4770be375ff 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -169,9 +169,9 @@ static void key_garbage_collector(struct work_struct *work) /* trawl through the keys looking for keyrings */ for (;;) { - if (key->expiry > now && key->expiry < new_timer) { + if (key->expiry > limit && key->expiry < new_timer) { kdebug("will expire %x in %ld", - key_serial(key), key->expiry - now); + key_serial(key), key->expiry - limit); new_timer = key->expiry; } diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 500aad0ebd6..3bb90b6f1dd 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -187,7 +187,7 @@ static inline void print_ipv6_addr(struct audit_buffer *ab, char *name1, char *name2) { if (!ipv6_addr_any(addr)) - audit_log_format(ab, " %s=%pI6", name1, addr); + audit_log_format(ab, " %s=%pI6c", name1, addr); if (port) audit_log_format(ab, " %s=%d", name2, ntohs(port)); } diff --git a/security/min_addr.c b/security/min_addr.c index 14cc7b3b8d0..c844eed7915 100644 --- a/security/min_addr.c +++ b/security/min_addr.c @@ -28,12 +28,12 @@ static void update_mmap_min_addr(void) * sysctl handler which just sets dac_mmap_min_addr = the new value and then * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly */ -int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, +int mmap_min_addr_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); update_mmap_min_addr(); diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 1ed0f076aad..b4b5da1c0a4 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -868,8 +868,19 @@ u32 avc_policy_seqno(void) void avc_disable(void) { - avc_flush(); - synchronize_rcu(); - if (avc_node_cachep) - kmem_cache_destroy(avc_node_cachep); + /* + * If you are looking at this because you have realized that we are + * not destroying the avc_node_cachep it might be easy to fix, but + * I don't know the memory barrier semantics well enough to know. It's + * possible that some other task dereferenced security_ops when + * it still pointed to selinux operations. If that is the case it's + * possible that it is about to use the avc and is about to need the + * avc_node_cachep. I know I could wrap the security.c security_ops call + * in an rcu_lock, but seriously, it's not worth it. Instead I just flush + * the cache and get that memory back. + */ + if (avc_node_cachep) { + avc_flush(); + /* kmem_cache_destroy(avc_node_cachep); */ + } } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 417f7c99452..bb230d5d708 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2411,7 +2411,7 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) /* Wake up the parent if it is waiting so that it can recheck * wait permission to the new task SID. */ read_lock(&tasklist_lock); - wake_up_interruptible(¤t->real_parent->signal->wait_chldexit); + __wake_up_parent(current, current->real_parent); read_unlock(&tasklist_lock); } diff --git a/usr/.gitignore b/usr/.gitignore index 69b2e89fa16..8e48117a3f3 100644 --- a/usr/.gitignore +++ b/usr/.gitignore @@ -4,5 +4,7 @@ gen_init_cpio initramfs_data.cpio initramfs_data.cpio.gz +initramfs_data.cpio.bz2 +initramfs_data.cpio.lzma initramfs_list include diff --git a/usr/Makefile b/usr/Makefile index 245145a99c1..1e6a9e4a72c 100644 --- a/usr/Makefile +++ b/usr/Makefile @@ -6,7 +6,7 @@ klibcdirs:; PHONY += klibcdirs -# Gzip, but no bzip2 +# Gzip suffix_$(CONFIG_INITRAMFS_COMPRESSION_GZIP) = .gz # Bzip2 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 897bff3b7df..034a798b043 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -738,8 +738,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) bool called = true; struct kvm_vcpu *vcpu; - if (alloc_cpumask_var(&cpus, GFP_ATOMIC)) - cpumask_clear(cpus); + zalloc_cpumask_var(&cpus, GFP_ATOMIC); spin_lock(&kvm->requests_lock); me = smp_processor_id(); |