aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-power17
-rw-r--r--Documentation/DocBook/Makefile2
-rw-r--r--Documentation/DocBook/filesystems.tmpl300
-rw-r--r--Documentation/DocBook/journal-api.tmpl333
-rw-r--r--Documentation/accounting/getdelays.c2
-rw-r--r--Documentation/kernel-doc-nano-HOWTO.txt2
-rw-r--r--Documentation/power/interface.txt13
-rw-r--r--arch/i386/kernel/acpi/boot.c2
-rw-r--r--arch/um/include/sysdep-i386/barrier.h9
-rw-r--r--arch/um/include/sysdep-x86_64/barrier.h7
-rw-r--r--arch/um/os-Linux/process.c1
-rw-r--r--arch/um/os-Linux/signal.c31
-rw-r--r--arch/um/os-Linux/skas/process.c2
-rw-r--r--arch/um/os-Linux/tls.c2
-rw-r--r--block/ll_rw_blk.c9
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c2
-rw-r--r--drivers/edac/edac_mc.c45
-rw-r--r--drivers/ide/pci/amd74xx.c7
-rw-r--r--drivers/isdn/hysdn/hysdn_sched.c19
-rw-r--r--drivers/md/md.c2
-rw-r--r--drivers/misc/lkdtm.c24
-rw-r--r--drivers/spi/spi.c1
-rw-r--r--fs/cifs/file.c23
-rw-r--r--fs/compat.c20
-rw-r--r--fs/ecryptfs/crypto.c2
-rw-r--r--fs/fuse/file.c9
-rw-r--r--fs/gfs2/ops_address.c7
-rw-r--r--fs/nfsd/nfs4recover.c2
-rw-r--r--fs/reiserfs/super.c1
-rw-r--r--fs/xattr.c13
-rw-r--r--include/asm-powerpc/systbl.h2
-rw-r--r--include/asm-powerpc/unistd.h2
-rw-r--r--include/linux/compat.h4
-rw-r--r--include/linux/kernel.h2
-rw-r--r--include/linux/pm.h4
-rw-r--r--include/linux/ufs_fs.h2
-rw-r--r--ipc/msg.c1
-rw-r--r--ipc/sem.c1
-rw-r--r--ipc/shm.c1
-rw-r--r--ipc/util.c2
-rw-r--r--ipc/util.h12
-rw-r--r--kernel/compat.c33
-rw-r--r--kernel/futex.c7
-rw-r--r--kernel/power/disk.c37
-rw-r--r--kernel/printk.c21
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--mm/migrate.c3
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/slab.c2
-rw-r--r--scripts/basic/docproc.c2
51 files changed, 595 insertions, 457 deletions
diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index d882f809387..dcff4d0623a 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -21,7 +21,7 @@ Description:
these states.
What: /sys/power/disk
-Date: August 2006
+Date: September 2006
Contact: Rafael J. Wysocki <rjw@sisk.pl>
Description:
The /sys/power/disk file controls the operating mode of the
@@ -39,6 +39,19 @@ Description:
'reboot' - the memory image will be saved by the kernel and
the system will be rebooted.
+ Additionally, /sys/power/disk can be used to turn on one of the
+ two testing modes of the suspend-to-disk mechanism: 'testproc'
+ or 'test'. If the suspend-to-disk mechanism is in the
+ 'testproc' mode, writing 'disk' to /sys/power/state will cause
+ the kernel to disable nonboot CPUs and freeze tasks, wait for 5
+ seconds, unfreeze tasks and enable nonboot CPUs. If it is in
+ the 'test' mode, writing 'disk' to /sys/power/state will cause
+ the kernel to disable nonboot CPUs and freeze tasks, shrink
+ memory, suspend devices, wait for 5 seconds, resume devices,
+ unfreeze tasks and enable nonboot CPUs. Then, we are able to
+ look in the log messages and work out, for example, which code
+ is being slow and which device drivers are misbehaving.
+
The suspend-to-disk method may be chosen by writing to this
file one of the accepted strings:
@@ -46,6 +59,8 @@ Description:
'platform'
'shutdown'
'reboot'
+ 'testproc'
+ 'test'
It will only change to 'firmware' or 'platform' if the system
supports that.
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 3bf5086574b..db9499adbed 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -9,7 +9,7 @@
DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
procfs-guide.xml writing_usb_driver.xml \
- kernel-api.xml filesystems.xml journal-api.xml lsm.xml usb.xml \
+ kernel-api.xml filesystems.xml lsm.xml usb.xml \
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
genericirq.xml
diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl
index 4785032fb6e..39fa2aba7f9 100644
--- a/Documentation/DocBook/filesystems.tmpl
+++ b/Documentation/DocBook/filesystems.tmpl
@@ -98,4 +98,304 @@
</sect1>
</chapter>
+ <chapter id="LinuxJDBAPI">
+ <chapterinfo>
+ <title>The Linux Journalling API</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Roger</firstname>
+ <surname>Gammans</surname>
+ <affiliation>
+ <address>
+ <email>rgammans@computer-surgery.co.uk</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <authorgroup>
+ <author>
+ <firstname>Stephen</firstname>
+ <surname>Tweedie</surname>
+ <affiliation>
+ <address>
+ <email>sct@redhat.com</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2002</year>
+ <holder>Roger Gammans</holder>
+ </copyright>
+ </chapterinfo>
+
+ <title>The Linux Journalling API</title>
+
+ <sect1>
+ <title>Overview</title>
+ <sect2>
+ <title>Details</title>
+<para>
+The journalling layer is easy to use. You need to
+first of all create a journal_t data structure. There are
+two calls to do this dependent on how you decide to allocate the physical
+media on which the journal resides. The journal_init_inode() call
+is for journals stored in filesystem inodes, or the journal_init_dev()
+call can be use for journal stored on a raw device (in a continuous range
+of blocks). A journal_t is a typedef for a struct pointer, so when
+you are finally finished make sure you call journal_destroy() on it
+to free up any used kernel memory.
+</para>
+
+<para>
+Once you have got your journal_t object you need to 'mount' or load the journal
+file, unless of course you haven't initialised it yet - in which case you
+need to call journal_create().
+</para>
+
+<para>
+Most of the time however your journal file will already have been created, but
+before you load it you must call journal_wipe() to empty the journal file.
+Hang on, you say , what if the filesystem wasn't cleanly umount()'d . Well, it is the
+job of the client file system to detect this and skip the call to journal_wipe().
+</para>
+
+<para>
+In either case the next call should be to journal_load() which prepares the
+journal file for use. Note that journal_wipe(..,0) calls journal_skip_recovery()
+for you if it detects any outstanding transactions in the journal and similarly
+journal_load() will call journal_recover() if necessary.
+I would advise reading fs/ext3/super.c for examples on this stage.
+[RGG: Why is the journal_wipe() call necessary - doesn't this needlessly
+complicate the API. Or isn't a good idea for the journal layer to hide
+dirty mounts from the client fs]
+</para>
+
+<para>
+Now you can go ahead and start modifying the underlying
+filesystem. Almost.
+</para>
+
+<para>
+
+You still need to actually journal your filesystem changes, this
+is done by wrapping them into transactions. Additionally you
+also need to wrap the modification of each of the buffers
+with calls to the journal layer, so it knows what the modifications
+you are actually making are. To do this use journal_start() which
+returns a transaction handle.
+</para>
+
+<para>
+journal_start()
+and its counterpart journal_stop(), which indicates the end of a transaction
+are nestable calls, so you can reenter a transaction if necessary,
+but remember you must call journal_stop() the same number of times as
+journal_start() before the transaction is completed (or more accurately
+leaves the update phase). Ext3/VFS makes use of this feature to simplify
+quota support.
+</para>
+
+<para>
+Inside each transaction you need to wrap the modifications to the
+individual buffers (blocks). Before you start to modify a buffer you
+need to call journal_get_{create,write,undo}_access() as appropriate,
+this allows the journalling layer to copy the unmodified data if it
+needs to. After all the buffer may be part of a previously uncommitted
+transaction.
+At this point you are at last ready to modify a buffer, and once
+you are have done so you need to call journal_dirty_{meta,}data().
+Or if you've asked for access to a buffer you now know is now longer
+required to be pushed back on the device you can call journal_forget()
+in much the same way as you might have used bforget() in the past.
+</para>
+
+<para>
+A journal_flush() may be called at any time to commit and checkpoint
+all your transactions.
+</para>
+
+<para>
+Then at umount time , in your put_super() (2.4) or write_super() (2.5)
+you can then call journal_destroy() to clean up your in-core journal object.
+</para>
+
+<para>
+Unfortunately there a couple of ways the journal layer can cause a deadlock.
+The first thing to note is that each task can only have
+a single outstanding transaction at any one time, remember nothing
+commits until the outermost journal_stop(). This means
+you must complete the transaction at the end of each file/inode/address
+etc. operation you perform, so that the journalling system isn't re-entered
+on another journal. Since transactions can't be nested/batched
+across differing journals, and another filesystem other than
+yours (say ext3) may be modified in a later syscall.
+</para>
+
+<para>
+The second case to bear in mind is that journal_start() can
+block if there isn't enough space in the journal for your transaction
+(based on the passed nblocks param) - when it blocks it merely(!) needs to
+wait for transactions to complete and be committed from other tasks,
+so essentially we are waiting for journal_stop(). So to avoid
+deadlocks you must treat journal_start/stop() as if they
+were semaphores and include them in your semaphore ordering rules to prevent
+deadlocks. Note that journal_extend() has similar blocking behaviour to
+journal_start() so you can deadlock here just as easily as on journal_start().
+</para>
+
+<para>
+Try to reserve the right number of blocks the first time. ;-). This will
+be the maximum number of blocks you are going to touch in this transaction.
+I advise having a look at at least ext3_jbd.h to see the basis on which
+ext3 uses to make these decisions.
+</para>
+
+<para>
+Another wriggle to watch out for is your on-disk block allocation strategy.
+why? Because, if you undo a delete, you need to ensure you haven't reused any
+of the freed blocks in a later transaction. One simple way of doing this
+is make sure any blocks you allocate only have checkpointed transactions
+listed against them. Ext3 does this in ext3_test_allocatable().
+</para>
+
+<para>
+Lock is also providing through journal_{un,}lock_updates(),
+ext3 uses this when it wants a window with a clean and stable fs for a moment.
+eg.
+</para>
+
+<programlisting>
+
+ journal_lock_updates() //stop new stuff happening..
+ journal_flush() // checkpoint everything.
+ ..do stuff on stable fs
+ journal_unlock_updates() // carry on with filesystem use.
+</programlisting>
+
+<para>
+The opportunities for abuse and DOS attacks with this should be obvious,
+if you allow unprivileged userspace to trigger codepaths containing these
+calls.
+</para>
+
+<para>
+A new feature of jbd since 2.5.25 is commit callbacks with the new
+journal_callback_set() function you can now ask the journalling layer
+to call you back when the transaction is finally committed to disk, so that
+you can do some of your own management. The key to this is the journal_callback
+struct, this maintains the internal callback information but you can
+extend it like this:-
+</para>
+<programlisting>
+ struct myfs_callback_s {
+ //Data structure element required by jbd..
+ struct journal_callback for_jbd;
+ // Stuff for myfs allocated together.
+ myfs_inode* i_commited;
+
+ }
+</programlisting>
+
+<para>
+this would be useful if you needed to know when data was committed to a
+particular inode.
+</para>
+
+ </sect2>
+
+ <sect2>
+ <title>Summary</title>
+<para>
+Using the journal is a matter of wrapping the different context changes,
+being each mount, each modification (transaction) and each changed buffer
+to tell the journalling layer about them.
+</para>
+
+<para>
+Here is a some pseudo code to give you an idea of how it works, as
+an example.
+</para>
+
+<programlisting>
+ journal_t* my_jnrl = journal_create();
+ journal_init_{dev,inode}(jnrl,...)
+ if (clean) journal_wipe();
+ journal_load();
+
+ foreach(transaction) { /*transactions must be
+ completed before
+ a syscall returns to
+ userspace*/
+
+ handle_t * xct=journal_start(my_jnrl);
+ foreach(bh) {
+ journal_get_{create,write,undo}_access(xact,bh);
+ if ( myfs_modify(bh) ) { /* returns true
+ if makes changes */
+ journal_dirty_{meta,}data(xact,bh);
+ } else {
+ journal_forget(bh);
+ }
+ }
+ journal_stop(xct);
+ }
+ journal_destroy(my_jrnl);
+</programlisting>
+ </sect2>
+
+ </sect1>
+
+ <sect1>
+ <title>Data Types</title>
+ <para>
+ The journalling layer uses typedefs to 'hide' the concrete definitions
+ of the structures used. As a client of the JBD layer you can
+ just rely on the using the pointer as a magic cookie of some sort.
+
+ Obviously the hiding is not enforced as this is 'C'.
+ </para>
+ <sect2><title>Structures</title>
+!Iinclude/linux/jbd.h
+ </sect2>
+ </sect1>
+
+ <sect1>
+ <title>Functions</title>
+ <para>
+ The functions here are split into two groups those that
+ affect a journal as a whole, and those which are used to
+ manage transactions
+ </para>
+ <sect2><title>Journal Level</title>
+!Efs/jbd/journal.c
+!Ifs/jbd/recovery.c
+ </sect2>
+ <sect2><title>Transasction Level</title>
+!Efs/jbd/transaction.c
+ </sect2>
+ </sect1>
+ <sect1>
+ <title>See also</title>
+ <para>
+ <citation>
+ <ulink url="ftp://ftp.uk.linux.org/pub/linux/sct/fs/jfs/journal-design.ps.gz">
+ Journaling the Linux ext2fs Filesystem, LinuxExpo 98, Stephen Tweedie
+ </ulink>
+ </citation>
+ </para>
+ <para>
+ <citation>
+ <ulink url="http://olstrans.sourceforge.net/release/OLS2000-ext3/OLS2000-ext3.html">
+ Ext3 Journalling FileSystem, OLS 2000, Dr. Stephen Tweedie
+ </ulink>
+ </citation>
+ </para>
+ </sect1>
+
+ </chapter>
+
</book>
diff --git a/Documentation/DocBook/journal-api.tmpl b/Documentation/DocBook/journal-api.tmpl
deleted file mode 100644
index 2077f9a28c1..00000000000
--- a/Documentation/DocBook/journal-api.tmpl
+++ /dev/null
@@ -1,333 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
- "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
-
-<book id="LinuxJBDAPI">
- <bookinfo>
- <title>The Linux Journalling API</title>
- <authorgroup>
- <author>
- <firstname>Roger</firstname>
- <surname>Gammans</surname>
- <affiliation>
- <address>
- <email>rgammans@computer-surgery.co.uk</email>
- </address>
- </affiliation>
- </author>
- </authorgroup>
-
- <authorgroup>
- <author>
- <firstname>Stephen</firstname>
- <surname>Tweedie</surname>
- <affiliation>
- <address>
- <email>sct@redhat.com</email>
- </address>
- </affiliation>
- </author>
- </authorgroup>
-
- <copyright>
- <year>2002</year>
- <holder>Roger Gammans</holder>
- </copyright>
-
-<legalnotice>
- <para>
- This documentation is free software; you can redistribute
- it and/or modify it under the terms of the GNU General Public
- License as published by the Free Software Foundation; either
- version 2 of the License, or (at your option) any later
- version.
- </para>
-
- <para>
- This program is distributed in the hope that it will be
- useful, but WITHOUT ANY WARRANTY; without even the implied
- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- See the GNU General Public License for more details.
- </para>
-
- <para>
- You should have received a copy of the GNU General Public
- License along with this program; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- MA 02111-1307 USA
- </para>
-
- <para>
- For more details see the file COPYING in the source
- distribution of Linux.
- </para>
- </legalnotice>
- </bookinfo>
-
-<toc></toc>
-
- <chapter id="Overview">
- <title>Overview</title>
- <sect1>
- <title>Details</title>
-<para>
-The journalling layer is easy to use. You need to
-first of all create a journal_t data structure. There are
-two calls to do this dependent on how you decide to allocate the physical
-media on which the journal resides. The journal_init_inode() call
-is for journals stored in filesystem inodes, or the journal_init_dev()
-call can be use for journal stored on a raw device (in a continuous range
-of blocks). A journal_t is a typedef for a struct pointer, so when
-you are finally finished make sure you call journal_destroy() on it
-to free up any used kernel memory.
-</para>
-
-<para>
-Once you have got your journal_t object you need to 'mount' or load the journal
-file, unless of course you haven't initialised it yet - in which case you
-need to call journal_create().
-</para>
-
-<para>
-Most of the time however your journal file will already have been created, but
-before you load it you must call journal_wipe() to empty the journal file.
-Hang on, you say , what if the filesystem wasn't cleanly umount()'d . Well, it is the
-job of the client file system to detect this and skip the call to journal_wipe().
-</para>
-
-<para>
-In either case the next call should be to journal_load() which prepares the
-journal file for use. Note that journal_wipe(..,0) calls journal_skip_recovery()
-for you if it detects any outstanding transactions in the journal and similarly
-journal_load() will call journal_recover() if necessary.
-I would advise reading fs/ext3/super.c for examples on this stage.
-[RGG: Why is the journal_wipe() call necessary - doesn't this needlessly
-complicate the API. Or isn't a good idea for the journal layer to hide
-dirty mounts from the client fs]
-</para>
-
-<para>
-Now you can go ahead and start modifying the underlying
-filesystem. Almost.
-</para>
-
-
-<para>
-
-You still need to actually journal your filesystem changes, this
-is done by wrapping them into transactions. Additionally you
-also need to wrap the modification of each of the buffers
-with calls to the journal layer, so it knows what the modifications
-you are actually making are. To do this use journal_start() which
-returns a transaction handle.
-</para>
-
-<para>
-journal_start()
-and its counterpart journal_stop(), which indicates the end of a transaction
-are nestable calls, so you can reenter a transaction if necessary,
-but remember you must call journal_stop() the same number of times as
-journal_start() before the transaction is completed (or more accurately
-leaves the update phase). Ext3/VFS makes use of this feature to simplify
-quota support.
-</para>
-
-<para>
-Inside each transaction you need to wrap the modifications to the
-individual buffers (blocks). Before you start to modify a buffer you
-need to call journal_get_{create,write,undo}_access() as appropriate,
-this allows the journalling layer to copy the unmodified data if it
-needs to. After all the buffer may be part of a previously uncommitted
-transaction.
-At this point you are at last ready to modify a buffer, and once
-you are have done so you need to call journal_dirty_{meta,}data().
-Or if you've asked for access to a buffer you now know is now longer
-required to be pushed back on the device you can call journal_forget()
-in much the same way as you might have used bforget() in the past.
-</para>
-
-<para>
-A journal_flush() may be called at any time to commit and checkpoint
-all your transactions.
-</para>
-
-<para>
-Then at umount time , in your put_super() (2.4) or write_super() (2.5)
-you can then call journal_destroy() to clean up your in-core journal object.
-</para>
-
-
-<para>
-Unfortunately there a couple of ways the journal layer can cause a deadlock.
-The first thing to note is that each task can only have
-a single outstanding transaction at any one time, remember nothing
-commits until the outermost journal_stop(). This means
-you must complete the transaction at the end of each file/inode/address
-etc. operation you perform, so that the journalling system isn't re-entered
-on another journal. Since transactions can't be nested/batched
-across differing journals, and another filesystem other than
-yours (say ext3) may be modified in a later syscall.
-</para>
-
-<para>
-The second case to bear in mind is that journal_start() can
-block if there isn't enough space in the journal for your transaction
-(based on the passed nblocks param) - when it blocks it merely(!) needs to
-wait for transactions to complete and be committed from other tasks,
-so essentially we are waiting for journal_stop(). So to avoid
-deadlocks you must treat journal_start/stop() as if they
-were semaphores and include them in your semaphore ordering rules to prevent
-deadlocks. Note that journal_extend() has similar blocking behaviour to
-journal_start() so you can deadlock here just as easily as on journal_start().
-</para>
-
-<para>
-Try to reserve the right number of blocks the first time. ;-). This will
-be the maximum number of blocks you are going to touch in this transaction.
-I advise having a look at at least ext3_jbd.h to see the basis on which
-ext3 uses to make these decisions.
-</para>
-
-<para>
-Another wriggle to watch out for is your on-disk block allocation strategy.
-why? Because, if you undo a delete, you need to ensure you haven't reused any
-of the freed blocks in a later transaction. One simple way of doing this
-is make sure any blocks you allocate only have checkpointed transactions
-listed against them. Ext3 does this in ext3_test_allocatable().
-</para>
-
-<para>
-Lock is also providing through journal_{un,}lock_updates(),
-ext3 uses this when it wants a window with a clean and stable fs for a moment.
-eg.
-</para>
-
-<programlisting>
-
- journal_lock_updates() //stop new stuff happening..
- journal_flush() // checkpoint everything.
- ..do stuff on stable fs
- journal_unlock_updates() // carry on with filesystem use.
-</programlisting>
-
-<para>
-The opportunities for abuse and DOS attacks with this should be obvious,
-if you allow unprivileged userspace to trigger codepaths containing these
-calls.
-</para>
-
-<para>
-A new feature of jbd since 2.5.25 is commit callbacks with the new
-journal_callback_set() function you can now ask the journalling layer
-to call you back when the transaction is finally committed to disk, so that
-you can do some of your own management. The key to this is the journal_callback
-struct, this maintains the internal callback information but you can
-extend it like this:-
-</para>
-<programlisting>
- struct myfs_callback_s {
- //Data structure element required by jbd..
- struct journal_callback for_jbd;
- // Stuff for myfs allocated together.
- myfs_inode* i_commited;
-
- }
-</programlisting>
-
-<para>
-this would be useful if you needed to know when data was committed to a
-particular inode.
-</para>
-
-</sect1>
-
-<sect1>
-<title>Summary</title>
-<para>
-Using the journal is a matter of wrapping the different context changes,
-being each mount, each modification (transaction) and each changed buffer
-to tell the journalling layer about them.
-</para>
-
-<para>
-Here is a some pseudo code to give you an idea of how it works, as
-an example.
-</para>
-
-<programlisting>
- journal_t* my_jnrl = journal_create();
- journal_init_{dev,inode}(jnrl,...)
- if (clean) journal_wipe();
- journal_load();
-
- foreach(transaction) { /*transactions must be
- completed before
- a syscall returns to
- userspace*/
-
- handle_t * xct=journal_start(my_jnrl);
- foreach(bh) {
- journal_get_{create,write,undo}_access(xact,bh);
- if ( myfs_modify(bh) ) { /* returns true
- if makes changes */
- journal_dirty_{meta,}data(xact,bh);
- } else {
- journal_forget(bh);
- }
- }
- journal_stop(xct);
- }
- journal_destroy(my_jrnl);
-</programlisting>
-</sect1>
-
-</chapter>
-
- <chapter id="adt">
- <title>Data Types</title>
- <para>
- The journalling layer uses typedefs to 'hide' the concrete definitions
- of the structures used. As a client of the JBD layer you can
- just rely on the using the pointer as a magic cookie of some sort.
-
- Obviously the hiding is not enforced as this is 'C'.
- </para>
- <sect1><title>Structures</title>
-!Iinclude/linux/jbd.h
- </sect1>
-</chapter>
-
- <chapter id="calls">
- <title>Functions</title>
- <para>
- The functions here are split into two groups those that
- affect a journal as a whole, and those which are used to
- manage transactions
-</para>
- <sect1><title>Journal Level</title>
-!Efs/jbd/journal.c
-!Ifs/jbd/recovery.c
- </sect1>
- <sect1><title>Transasction Level</title>
-!Efs/jbd/transaction.c
- </sect1>
-</chapter>
-<chapter>
- <title>See also</title>
- <para>
- <citation>
- <ulink url="ftp://ftp.uk.linux.org/pub/linux/sct/fs/jfs/journal-design.ps.gz">
- Journaling the Linux ext2fs Filesystem,LinuxExpo 98, Stephen Tweedie
- </ulink>
- </citation>
- </para>
- <para>
- <citation>
- <ulink url="http://olstrans.sourceforge.net/release/OLS2000-ext3/OLS2000-ext3.html">
- Ext3 Journalling FileSystem , OLS 2000, Dr. Stephen Tweedie
- </ulink>
- </citation>
- </para>
-</chapter>
-
-</book>
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index b11792abd6b..bf2b0e2f87e 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -49,7 +49,7 @@ __u64 stime, utime;
}
/* Maximum size of response requested or message sent */
-#define MAX_MSG_SIZE 256
+#define MAX_MSG_SIZE 1024
/* Maximum number of cpus expected to be specified in a cpumask */
#define MAX_CPUS 32
/* Maximum length of pathname to log file */
diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt
index c65233d430f..284e7e198e9 100644
--- a/Documentation/kernel-doc-nano-HOWTO.txt
+++ b/Documentation/kernel-doc-nano-HOWTO.txt
@@ -17,7 +17,7 @@ are:
special place-holders for where the extracted documentation should
go.
-- scripts/docproc.c
+- scripts/basic/docproc.c
This is a program for converting SGML template files into SGML
files. When a file is referenced it is searched for symbols
diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt
index a66bec222b1..74311d7e0f3 100644
--- a/Documentation/power/interface.txt
+++ b/Documentation/power/interface.txt
@@ -30,6 +30,17 @@ testing). The system will support either 'firmware' or 'platform', and
that is known a priori. But, the user may choose 'shutdown' or
'reboot' as alternatives.
+Additionally, /sys/power/disk can be used to turn on one of the two testing
+modes of the suspend-to-disk mechanism: 'testproc' or 'test'. If the
+suspend-to-disk mechanism is in the 'testproc' mode, writing 'disk' to
+/sys/power/state will cause the kernel to disable nonboot CPUs and freeze
+tasks, wait for 5 seconds, unfreeze tasks and enable nonboot CPUs. If it is
+in the 'test' mode, writing 'disk' to /sys/power/state will cause the kernel
+to disable nonboot CPUs and freeze tasks, shrink memory, suspend devices, wait
+for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs. Then,
+we are able to look in the log messages and work out, for example, which code
+is being slow and which device drivers are misbehaving.
+
Reading from this file will display what the mode is currently set
to. Writing to this file will accept one of
@@ -37,6 +48,8 @@ to. Writing to this file will accept one of
'platform'
'shutdown'
'reboot'
+ 'testproc'
+ 'test'
It will only change to 'firmware' or 'platform' if the system supports
it.
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index ab974ff9707..22e4c466e5a 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -70,7 +70,7 @@ static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return
#define PREFIX "ACPI: "
-int acpi_noirq __initdata; /* skip ACPI IRQ initialization */
+int acpi_noirq; /* skip ACPI IRQ initialization */
int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */
int acpi_ht __initdata = 1; /* enable HT */
diff --git a/arch/um/include/sysdep-i386/barrier.h b/arch/um/include/sysdep-i386/barrier.h
new file mode 100644
index 00000000000..b58d52c5b2f
--- /dev/null
+++ b/arch/um/include/sysdep-i386/barrier.h
@@ -0,0 +1,9 @@
+#ifndef __SYSDEP_I386_BARRIER_H
+#define __SYSDEP_I386_BARRIER_H
+
+/* Copied from include/asm-i386 for use by userspace. i386 has the option
+ * of using mfence, but I'm just using this, which works everywhere, for now.
+ */
+#define mb() asm volatile("lock; addl $0,0(%esp)")
+
+#endif
diff --git a/arch/um/include/sysdep-x86_64/barrier.h b/arch/um/include/sysdep-x86_64/barrier.h
new file mode 100644
index 00000000000..7b610befdc8
--- /dev/null
+++ b/arch/um/include/sysdep-x86_64/barrier.h
@@ -0,0 +1,7 @@
+#ifndef __SYSDEP_X86_64_BARRIER_H
+#define __SYSDEP_X86_64_BARRIER_H
+
+/* Copied from include/asm-x86_64 for use by userspace. */
+#define mb() asm volatile("mfence":::"memory")
+
+#endif
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 51f0893640a..c692a192957 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -7,7 +7,6 @@
#include <stdio.h>
#include <errno.h>
#include <signal.h>
-#include <linux/unistd.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/mman.h>
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 6b81739279d..b897e8592d7 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -15,6 +15,7 @@
#include "user.h"
#include "signal_kern.h"
#include "sysdep/sigcontext.h"
+#include "sysdep/barrier.h"
#include "sigcontext.h"
#include "mode.h"
#include "os.h"
@@ -34,8 +35,12 @@
#define SIGALRM_BIT 2
#define SIGALRM_MASK (1 << SIGALRM_BIT)
-static int signals_enabled = 1;
-static int pending = 0;
+/* These are used by both the signal handlers and
+ * block/unblock_signals. I don't want modifications cached in a
+ * register - they must go straight to memory.
+ */
+static volatile int signals_enabled = 1;
+static volatile int pending = 0;
void sig_handler(int sig, struct sigcontext *sc)
{
@@ -152,6 +157,12 @@ int change_sig(int signal, int on)
void block_signals(void)
{
signals_enabled = 0;
+ /* This must return with signals disabled, so this barrier
+ * ensures that writes are flushed out before the return.
+ * This might matter if gcc figures out how to inline this and
+ * decides to shuffle this code into the caller.
+ */
+ mb();
}
void unblock_signals(void)
@@ -171,9 +182,23 @@ void unblock_signals(void)
*/
signals_enabled = 1;
+ /* Setting signals_enabled and reading pending must
+ * happen in this order.
+ */
+ mb();
+
save_pending = pending;
- if(save_pending == 0)
+ if(save_pending == 0){
+ /* This must return with signals enabled, so
+ * this barrier ensures that writes are
+ * flushed out before the return. This might
+ * matter if gcc figures out how to inline
+ * this (unlikely, given its size) and decides
+ * to shuffle this code into the caller.
+ */
+ mb();
return;
+ }
pending = 0;
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index cb9ab54146c..9b34fe65949 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -14,7 +14,7 @@
#include <sys/mman.h>
#include <sys/user.h>
#include <sys/time.h>
-#include <asm/unistd.h>
+#include <sys/syscall.h>
#include <asm/types.h>
#include "user.h"
#include "sysdep/ptrace.h"
diff --git a/arch/um/os-Linux/tls.c b/arch/um/os-Linux/tls.c
index 9f7999f27c7..16215b99080 100644
--- a/arch/um/os-Linux/tls.c
+++ b/arch/um/os-Linux/tls.c
@@ -1,7 +1,7 @@
#include <errno.h>
+#include <unistd.h>
#include <sys/ptrace.h>
#include <sys/syscall.h>
-#include <unistd.h>
#include <asm/ldt.h>
#include "sysdep/tls.h"
#include "uml-config.h"
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index c7b1dac8bee..9eaee664053 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3075,11 +3075,12 @@ end_io:
if (maxsector) {
sector_t sector = bio->bi_sector;
- if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
+ if (maxsector < nr_sectors ||
+ maxsector - nr_sectors < sector) {
/*
- * This may well happen - partitions are not checked
- * to make sure they are within the size of the
- * whole device.
+ * This may well happen - partitions are not
+ * checked to make sure they are within the size
+ * of the whole device.
*/
handle_bad_sector(bio);
goto end_io;
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index e5cfb1fa47d..157fa81a264 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1867,7 +1867,7 @@ static int ipmi_pci_resume(struct pci_dev *pdev)
static struct pci_device_id ipmi_pci_devices[] = {
{ PCI_DEVICE(PCI_HP_VENDOR_ID, PCI_MMC_DEVICE_ID) },
- { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE) }
+ { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE_MASK) }
};
MODULE_DEVICE_TABLE(pci, ipmi_pci_devices);
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 4bde30bb3be..75e9e38330f 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -230,34 +230,43 @@ static struct kobj_type ktype_memctrl = {
*/
static int edac_sysfs_memctrl_setup(void)
{
- int err=0;
+ int err = 0;
debugf1("%s()\n", __func__);
/* create the /sys/devices/system/edac directory */
err = sysdev_class_register(&edac_class);
- if (!err) {
- /* Init the MC's kobject */
- memset(&edac_memctrl_kobj, 0, sizeof (edac_memctrl_kobj));
- edac_memctrl_kobj.parent = &edac_class.kset.kobj;
- edac_memctrl_kobj.ktype = &ktype_memctrl;
+ if (err) {
+ debugf1("%s() error=%d\n", __func__, err);
+ return err;
+ }
- /* generate sysfs "..../edac/mc" */
- err = kobject_set_name(&edac_memctrl_kobj,"mc");
+ /* Init the MC's kobject */
+ memset(&edac_memctrl_kobj, 0, sizeof (edac_memctrl_kobj));
+ edac_memctrl_kobj.parent = &edac_class.kset.kobj;
+ edac_memctrl_kobj.ktype = &ktype_memctrl;
- if (!err) {
- /* FIXME: maybe new sysdev_create_subdir() */
- err = kobject_register(&edac_memctrl_kobj);
+ /* generate sysfs "..../edac/mc" */
+ err = kobject_set_name(&edac_memctrl_kobj,"mc");
- if (err)
- debugf1("Failed to register '.../edac/mc'\n");
- else
- debugf1("Registered '.../edac/mc' kobject\n");
- }
- } else
- debugf1("%s() error=%d\n", __func__, err);
+ if (err)
+ goto fail;
+
+ /* FIXME: maybe new sysdev_create_subdir() */
+ err = kobject_register(&edac_memctrl_kobj);
+
+ if (err) {
+ debugf1("Failed to register '.../edac/mc'\n");
+ goto fail;
+ }
+ debugf1("Registered '.../edac/mc' kobject\n");
+
+ return 0;
+
+fail:
+ sysdev_class_unregister(&edac_class);
return err;
}
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index 2b0ea8b6608..753fe0e2145 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -75,6 +75,7 @@ static struct amd_ide_chip {
{ PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, 0x50, AMD_UDMA_133 },
{ PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE, 0x50, AMD_UDMA_133 },
{ PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE, 0x50, AMD_UDMA_133 },
+ { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE, 0x50, AMD_UDMA_133 },
{ PCI_DEVICE_ID_AMD_CS5536_IDE, 0x40, AMD_UDMA_100 },
{ 0 }
};
@@ -491,7 +492,8 @@ static ide_pci_device_t amd74xx_chipsets[] __devinitdata = {
/* 16 */ DECLARE_NV_DEV("NFORCE-MCP55"),
/* 17 */ DECLARE_NV_DEV("NFORCE-MCP61"),
/* 18 */ DECLARE_NV_DEV("NFORCE-MCP65"),
- /* 19 */ DECLARE_AMD_DEV("AMD5536"),
+ /* 19 */ DECLARE_NV_DEV("NFORCE-MCP67"),
+ /* 20 */ DECLARE_AMD_DEV("AMD5536"),
};
static int __devinit amd74xx_probe(struct pci_dev *dev, const struct pci_device_id *id)
@@ -530,7 +532,8 @@ static struct pci_device_id amd74xx_pci_tbl[] = {
{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 16 },
{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 17 },
{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 18 },
- { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 19 },
+ { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 19 },
+ { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 20 },
{ 0, },
};
MODULE_DEVICE_TABLE(pci, amd74xx_pci_tbl);
diff --git a/drivers/isdn/hysdn/hysdn_sched.c b/drivers/isdn/hysdn/hysdn_sched.c
index 1fadf0133e9..18758772b74 100644
--- a/drivers/isdn/hysdn/hysdn_sched.c
+++ b/drivers/isdn/hysdn/hysdn_sched.c
@@ -155,21 +155,17 @@ hysdn_tx_cfgline(hysdn_card *card, unsigned char *line, unsigned short chan)
if (card->debug_flags & LOG_SCHED_ASYN)
hysdn_addlog(card, "async tx-cfg chan=%d len=%d", chan, strlen(line) + 1);
- spin_lock_irqsave(&card->hysdn_lock, flags);
while (card->async_busy) {
- sti();
if (card->debug_flags & LOG_SCHED_ASYN)
hysdn_addlog(card, "async tx-cfg delayed");
msleep_interruptible(20); /* Timeout 20ms */
- if (!--cnt) {
- spin_unlock_irqrestore(&card->hysdn_lock, flags);
+ if (!--cnt)
return (-ERR_ASYNC_TIME); /* timed out */
- }
- cli();
} /* wait for buffer to become free */
+ spin_lock_irqsave(&card->hysdn_lock, flags);
strcpy(card->async_data, line);
card->async_len = strlen(line) + 1;
card->async_channel = chan;
@@ -177,30 +173,23 @@ hysdn_tx_cfgline(hysdn_card *card, unsigned char *line, unsigned short chan)
/* now queue the task */
schedule_work(&card->irq_queue);
- sti();
+ spin_unlock_irqrestore(&card->hysdn_lock, flags);
if (card->debug_flags & LOG_SCHED_ASYN)
hysdn_addlog(card, "async tx-cfg data queued");
cnt++; /* short delay */
- cli();
while (card->async_busy) {
- sti();
if (card->debug_flags & LOG_SCHED_ASYN)
hysdn_addlog(card, "async tx-cfg waiting for tx-ready");
msleep_interruptible(20); /* Timeout 20ms */
- if (!--cnt) {
- spin_unlock_irqrestore(&card->hysdn_lock, flags);
+ if (!--cnt)
return (-ERR_ASYNC_TIME); /* timed out */
- }
- cli();
} /* wait for buffer to become free again */
- spin_unlock_irqrestore(&card->hysdn_lock, flags);
-
if (card->debug_flags & LOG_SCHED_ASYN)
hysdn_addlog(card, "async tx-cfg data send");
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 50ab4a936e3..d1113560440 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3200,6 +3200,7 @@ static int do_md_run(mddev_t * mddev)
mddev->changed = 1;
md_new_event(mddev);
+ kobject_uevent(&mddev->gendisk->kobj, KOBJ_ONLINE);
return 0;
}
@@ -3313,6 +3314,7 @@ static int do_md_stop(mddev_t * mddev, int mode)
module_put(mddev->pers->owner);
mddev->pers = NULL;
+ kobject_uevent(&mddev->gendisk->kobj, KOBJ_OFFLINE);
if (mddev->ro)
mddev->ro = 0;
}
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
index bbdba7b37e1..46a9c35943b 100644
--- a/drivers/misc/lkdtm.c
+++ b/drivers/misc/lkdtm.c
@@ -44,12 +44,14 @@
*/
#include <linux/kernel.h>
+#include <linux/fs.h>
#include <linux/module.h>
+#include <linux/buffer_head.h>
#include <linux/kprobes.h>
-#include <linux/kallsyms.h>
+#include <linux/list.h>
#include <linux/init.h>
-#include <linux/irq.h>
#include <linux/interrupt.h>
+#include <linux/hrtimer.h>
#include <scsi/scsi_cmnd.h>
#ifdef CONFIG_IDE
@@ -116,16 +118,16 @@ static enum ctype cptype = NONE;
static int count = DEFAULT_COUNT;
module_param(recur_count, int, 0644);
-MODULE_PARM_DESC(recur_count, "Recurcion level for the stack overflow test,\
- default is 10");
+MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test, "\
+ "default is 10");
module_param(cpoint_name, charp, 0644);
-MODULE_PARM_DESC(cpoint_name, "Crash Point, where kernel is to be crashed");
-module_param(cpoint_type, charp, 06444);
-MODULE_PARM_DESC(cpoint_type, "Crash Point Type, action to be taken on\
- hitting the crash point");
-module_param(cpoint_count, int, 06444);
-MODULE_PARM_DESC(cpoint_count, "Crash Point Count, number of times the \
- crash point is to be hit to trigger action");
+MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed");
+module_param(cpoint_type, charp, 0644);
+MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\
+ "hitting the crash point");
+module_param(cpoint_count, int, 0644);
+MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\
+ "crash point is to be hit to trigger action");
unsigned int jp_do_irq(unsigned int irq)
{
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 146298ad737..c3c0626f550 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -281,7 +281,6 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n)
up(&board_lock);
return 0;
}
-EXPORT_SYMBOL_GPL(spi_register_board_info);
/* FIXME someone should add support for a __setup("spi", ...) that
* creates board info from kernel command lines
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 976a691c5a6..7e056b9b49e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1806,13 +1806,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
}
if ((rc < 0) || (smb_read_data == NULL)) {
cFYI(1, ("Read error in readpages: %d", rc));
- /* clean up remaing pages off list */
- while (!list_empty(page_list) && (i < num_pages)) {
- page = list_entry(page_list->prev, struct page,
- lru);
- list_del(&page->lru);
- page_cache_release(page);
- }
break;
} else if (bytes_read > 0) {
pSMBr = (struct smb_com_read_rsp *)smb_read_data;
@@ -1831,13 +1824,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
this case is ok - if we are at server EOF
we will hit it on next read */
- /* while (!list_empty(page_list) && (i < num_pages)) {
- page = list_entry(page_list->prev,
- struct page, list);
- list_del(&page->list);
- page_cache_release(page);
- }
- break; */
+ /* break; */
}
} else {
cFYI(1, ("No bytes read (%d) at offset %lld . "
@@ -1845,14 +1832,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
bytes_read, offset));
/* BB turn off caching and do new lookup on
file size at server? */
- while (!list_empty(page_list) && (i < num_pages)) {
- page = list_entry(page_list->prev, struct page,
- lru);
- list_del(&page->lru);
-
- /* BB removeme - replace with zero of page? */
- page_cache_release(page);
- }
break;
}
if (smb_read_data) {
diff --git a/fs/compat.c b/fs/compat.c
index 50624d4a70c..8d0a0018a7d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1835,9 +1835,12 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
} while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
- if (ret == 0 && tsp && !(current->personality & STICKY_TIMEOUTS)) {
+ if (tsp) {
struct compat_timespec rts;
+ if (current->personality & STICKY_TIMEOUTS)
+ goto sticky;
+
rts.tv_sec = timeout / HZ;
rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
if (rts.tv_nsec >= NSEC_PER_SEC) {
@@ -1846,8 +1849,19 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
}
if (compat_timespec_compare(&rts, &ts) >= 0)
rts = ts;
- if (copy_to_user(tsp, &rts, sizeof(rts)))
- ret = -EFAULT;
+ if (copy_to_user(tsp, &rts, sizeof(rts))) {
+sticky:
+ /*
+ * If an application puts its timeval in read-only
+ * memory, we don't want the Linux-specific update to
+ * the timeval to cause a fault after the select has
+ * completed successfully. However, because we're not
+ * updating the timeval, we can't restart the system
+ * call.
+ */
+ if (ret == -ERESTARTNOHAND)
+ ret = -EINTR;
+ }
}
if (ret == -ERESTARTNOHAND) {
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f49f105394b..136175a6933 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -134,7 +134,7 @@ int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
algified_name_len = (chaining_modifier_len + cipher_name_len + 3);
(*algified_name) = kmalloc(algified_name_len, GFP_KERNEL);
- if (!(algified_name)) {
+ if (!(*algified_name)) {
rc = -ENOMEM;
goto out;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2bb5ace3882..763a50daf1c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -397,14 +397,14 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
err = -EIO;
if (is_bad_inode(inode))
- goto clean_pages_up;
+ goto out;
data.file = file;
data.inode = inode;
data.req = fuse_get_req(fc);
err = PTR_ERR(data.req);
if (IS_ERR(data.req))
- goto clean_pages_up;
+ goto out;
err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
if (!err) {
@@ -413,10 +413,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
else
fuse_put_request(fc, data.req);
}
- return err;
-
-clean_pages_up:
- put_pages_list(pages);
+out:
return err;
}
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 8d5963c7e12..015640b3f12 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -337,13 +337,6 @@ out:
out_noerror:
ret = 0;
out_unlock:
- /* unlock all pages, we can't do any I/O right now */
- for (page_idx = 0; page_idx < nr_pages; page_idx++) {
- struct page *page = list_entry(pages->prev, struct page, lru);
- list_del(&page->lru);
- unlock_page(page);
- page_cache_release(page);
- }
if (do_unlock)
gfs2_holder_uninit(&gh);
goto out;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e9d07704680..81b8565d383 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -274,7 +274,7 @@ nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
* any regular files anyway, just in case the directory was created by
* a kernel from the future.... */
nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
- mutex_lock(&dir->d_inode->i_mutex);
+ mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
status = vfs_rmdir(dir->d_inode, dentry);
mutex_unlock(&dir->d_inode->i_mutex);
return status;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 9041802df83..17249994110 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1619,6 +1619,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
"jmacd-8: reiserfs_fill_super: unable to read bitmap");
goto error;
}
+ errval = -EINVAL;
#ifdef CONFIG_REISERFS_CHECK
SWARN(silent, s, "CONFIG_REISERFS_CHECK is set ON");
SWARN(silent, s, "- it is slow mode for debugging.");
diff --git a/fs/xattr.c b/fs/xattr.c
index 395635100f7..0901bdc2ce2 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -48,14 +48,21 @@ xattr_permission(struct inode *inode, const char *name, int mask)
return 0;
/*
- * The trusted.* namespace can only accessed by a privilegued user.
+ * The trusted.* namespace can only be accessed by a privileged user.
*/
if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
+ /* In user.* namespace, only regular files and directories can have
+ * extended attributes. For sticky directories, only the owner and
+ * privileged user can write attributes.
+ */
if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
- if (!S_ISREG(inode->i_mode) &&
- (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
+ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+ return -EPERM;
+ if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
+ (mask & MAY_WRITE) && (current->fsuid != inode->i_uid) &&
+ !capable(CAP_FOWNER))
return -EPERM;
}
diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h
index eac85ce101b..c6a03187f93 100644
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -261,7 +261,7 @@ SYSX(sys_ni_syscall, ppc_fadvise64_64, ppc_fadvise64_64)
PPC_SYS_SPU(rtas)
OLDSYS(debug_setcontext)
SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
+COMPAT_SYS(migrate_pages)
COMPAT_SYS(mbind)
COMPAT_SYS(get_mempolicy)
COMPAT_SYS(set_mempolicy)
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index 464a48cce7f..b5fe93291c9 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -276,7 +276,7 @@
#define __NR_rtas 255
#define __NR_sys_debug_setcontext 256
/* Number 257 is reserved for vserver */
-/* 258 currently unused */
+#define __NR_migrate_pages 258
#define __NR_mbind 259
#define __NR_get_mempolicy 260
#define __NR_set_mempolicy 261
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f1553196826..80b17f440ec 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -230,5 +230,9 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
extern int compat_printk(const char *fmt, ...);
extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
+ compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
+ const compat_ulong_t __user *new_nodes);
+
#endif /* CONFIG_COMPAT */
#endif /* _LINUX_COMPAT_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 80f39cab470..24b611147ad 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -171,6 +171,8 @@ __attribute_const__ roundup_pow_of_two(unsigned long x)
extern int printk_ratelimit(void);
extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
+extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
+ unsigned int interval_msec);
static inline void console_silent(void)
{
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 6b27e07aef1..070394e846d 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -116,7 +116,9 @@ typedef int __bitwise suspend_disk_method_t;
#define PM_DISK_PLATFORM ((__force suspend_disk_method_t) 2)
#define PM_DISK_SHUTDOWN ((__force suspend_disk_method_t) 3)
#define PM_DISK_REBOOT ((__force suspend_disk_method_t) 4)
-#define PM_DISK_MAX ((__force suspend_disk_method_t) 5)
+#define PM_DISK_TEST ((__force suspend_disk_method_t) 5)
+#define PM_DISK_TESTPROC ((__force suspend_disk_method_t) 6)
+#define PM_DISK_MAX ((__force suspend_disk_method_t) 7)
struct pm_ops {
suspend_disk_method_t pm_disk_mode;
diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h
index 61eef508b04..28967eda9d7 100644
--- a/include/linux/ufs_fs.h
+++ b/include/linux/ufs_fs.h
@@ -908,7 +908,7 @@ struct ufs_super_block_third {
__fs64 fs_csaddr; /* blk addr of cyl grp summary area */
__fs64 fs_pendingblocks;/* blocks in process of being freed */
__fs32 fs_pendinginodes;/*inodes in process of being freed */
- } fs_u2;
+ } __attribute__ ((packed)) fs_u2;
} fs_un1;
union {
struct {
diff --git a/ipc/msg.c b/ipc/msg.c
index 5b213d95254..600e06f943a 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -124,6 +124,7 @@ void msg_exit_ns(struct ipc_namespace *ns)
}
mutex_unlock(&msg_ids(ns).mutex);
+ ipc_fini_ids(ns->ids[IPC_MSG_IDS]);
kfree(ns->ids[IPC_MSG_IDS]);
ns->ids[IPC_MSG_IDS] = NULL;
}
diff --git a/ipc/sem.c b/ipc/sem.c
index 0dafcc455f9..21b3289d640 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -161,6 +161,7 @@ void sem_exit_ns(struct ipc_namespace *ns)
}
mutex_unlock(&sem_ids(ns).mutex);
+ ipc_fini_ids(ns->ids[IPC_SEM_IDS]);
kfree(ns->ids[IPC_SEM_IDS]);
ns->ids[IPC_SEM_IDS] = NULL;
}
diff --git a/ipc/shm.c b/ipc/shm.c
index bfbd317ec11..d1198dd07a1 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -116,6 +116,7 @@ void shm_exit_ns(struct ipc_namespace *ns)
}
mutex_unlock(&shm_ids(ns).mutex);
+ ipc_fini_ids(ns->ids[IPC_SHM_IDS]);
kfree(ns->ids[IPC_SHM_IDS]);
ns->ids[IPC_SHM_IDS] = NULL;
}
diff --git a/ipc/util.c b/ipc/util.c
index 42479e4eec5..cd8bb14a431 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -301,7 +301,7 @@ static int grow_ary(struct ipc_ids* ids, int newsize)
*/
rcu_assign_pointer(ids->entries, new);
- ipc_rcu_putref(old);
+ __ipc_fini_ids(ids, old);
return newsize;
}
diff --git a/ipc/util.h b/ipc/util.h
index c8fd6b9d77b..e3aa2c5c97d 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -83,6 +83,18 @@ void* ipc_rcu_alloc(int size);
void ipc_rcu_getref(void *ptr);
void ipc_rcu_putref(void *ptr);
+static inline void __ipc_fini_ids(struct ipc_ids *ids,
+ struct ipc_id_ary *entries)
+{
+ if (entries != &ids->nullentry)
+ ipc_rcu_putref(entries);
+}
+
+static inline void ipc_fini_ids(struct ipc_ids *ids)
+{
+ __ipc_fini_ids(ids, ids->entries);
+}
+
struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id);
struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id);
void ipc_lock_by_ptr(struct kern_ipc_perm *ipcp);
diff --git a/kernel/compat.c b/kernel/compat.c
index d4898aad6cf..6952dd05730 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -982,4 +982,37 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages,
}
return sys_move_pages(pid, nr_pages, pages, nodes, status, flags);
}
+
+asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
+ compat_ulong_t maxnode,
+ const compat_ulong_t __user *old_nodes,
+ const compat_ulong_t __user *new_nodes)
+{
+ unsigned long __user *old = NULL;
+ unsigned long __user *new = NULL;
+ nodemask_t tmp_mask;
+ unsigned long nr_bits;
+ unsigned long size;
+
+ nr_bits = min_t(unsigned long, maxnode - 1, MAX_NUMNODES);
+ size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
+ if (old_nodes) {
+ if (compat_get_bitmap(nodes_addr(tmp_mask), old_nodes, nr_bits))
+ return -EFAULT;
+ old = compat_alloc_user_space(new_nodes ? size * 2 : size);
+ if (new_nodes)
+ new = old + size / sizeof(unsigned long);
+ if (copy_to_user(old, nodes_addr(tmp_mask), size))
+ return -EFAULT;
+ }
+ if (new_nodes) {
+ if (compat_get_bitmap(nodes_addr(tmp_mask), new_nodes, nr_bits))
+ return -EFAULT;
+ if (new == NULL)
+ new = compat_alloc_user_space(size);
+ if (copy_to_user(new, nodes_addr(tmp_mask), size))
+ return -EFAULT;
+ }
+ return sys_migrate_pages(pid, nr_bits + 1, old, new);
+}
#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index b364e002619..93ef30ba209 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1507,6 +1507,13 @@ static int futex_fd(u32 __user *uaddr, int signal)
struct futex_q *q;
struct file *filp;
int ret, err;
+ static unsigned long printk_interval;
+
+ if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
+ printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
+ "will be removed from the kernel in June 2007\n",
+ current->comm);
+ }
ret = -EINVAL;
if (!valid_signal(signal))
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index d3a158a6031..b1fb7866b0b 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -71,7 +71,7 @@ static inline void platform_finish(void)
static int prepare_processes(void)
{
- int error;
+ int error = 0;
pm_prepare_console();
@@ -84,6 +84,12 @@ static int prepare_processes(void)
goto thaw;
}
+ if (pm_disk_mode == PM_DISK_TESTPROC) {
+ printk("swsusp debug: Waiting for 5 seconds.\n");
+ mdelay(5000);
+ goto thaw;
+ }
+
/* Free memory before shutting down devices. */
if (!(error = swsusp_shrink_memory()))
return 0;
@@ -120,13 +126,21 @@ int pm_suspend_disk(void)
if (error)
return error;
+ if (pm_disk_mode == PM_DISK_TESTPROC)
+ goto Thaw;
+
suspend_console();
error = device_suspend(PMSG_FREEZE);
if (error) {
resume_console();
printk("Some devices failed to suspend\n");
- unprepare_processes();
- return error;
+ goto Thaw;
+ }
+
+ if (pm_disk_mode == PM_DISK_TEST) {
+ printk("swsusp debug: Waiting for 5 seconds.\n");
+ mdelay(5000);
+ goto Done;
}
pr_debug("PM: snapshotting memory.\n");
@@ -143,16 +157,17 @@ int pm_suspend_disk(void)
power_down(pm_disk_mode);
else {
swsusp_free();
- unprepare_processes();
- return error;
+ goto Thaw;
}
- } else
+ } else {
pr_debug("PM: Image restored successfully.\n");
+ }
swsusp_free();
Done:
device_resume();
resume_console();
+ Thaw:
unprepare_processes();
return error;
}
@@ -249,6 +264,8 @@ static const char * const pm_disk_modes[] = {
[PM_DISK_PLATFORM] = "platform",
[PM_DISK_SHUTDOWN] = "shutdown",
[PM_DISK_REBOOT] = "reboot",
+ [PM_DISK_TEST] = "test",
+ [PM_DISK_TESTPROC] = "testproc",
};
/**
@@ -303,17 +320,19 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
}
}
if (mode) {
- if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT)
+ if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT ||
+ mode == PM_DISK_TEST || mode == PM_DISK_TESTPROC) {
pm_disk_mode = mode;
- else {
+ } else {
if (pm_ops && pm_ops->enter &&
(mode == pm_ops->pm_disk_mode))
pm_disk_mode = mode;
else
error = -EINVAL;
}
- } else
+ } else {
error = -EINVAL;
+ }
pr_debug("PM: suspend-to-disk mode set to '%s'\n",
pm_disk_modes[mode]);
diff --git a/kernel/printk.c b/kernel/printk.c
index f7d427ef503..66426552fbf 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -31,6 +31,7 @@
#include <linux/security.h>
#include <linux/bootmem.h>
#include <linux/syscalls.h>
+#include <linux/jiffies.h>
#include <asm/uaccess.h>
@@ -1101,3 +1102,23 @@ int printk_ratelimit(void)
printk_ratelimit_burst);
}
EXPORT_SYMBOL(printk_ratelimit);
+
+/**
+ * printk_timed_ratelimit - caller-controlled printk ratelimiting
+ * @caller_jiffies: pointer to caller's state
+ * @interval_msecs: minimum interval between prints
+ *
+ * printk_timed_ratelimit() returns true if more than @interval_msecs
+ * milliseconds have elapsed since the last time printk_timed_ratelimit()
+ * returned true.
+ */
+bool printk_timed_ratelimit(unsigned long *caller_jiffies,
+ unsigned int interval_msecs)
+{
+ if (*caller_jiffies == 0 || time_after(jiffies, *caller_jiffies)) {
+ *caller_jiffies = jiffies + msecs_to_jiffies(interval_msecs);
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL(printk_timed_ratelimit);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 0e53314b14d..d7306d0f3df 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -135,6 +135,7 @@ cond_syscall(sys_madvise);
cond_syscall(sys_mremap);
cond_syscall(sys_remap_file_pages);
cond_syscall(compat_sys_move_pages);
+cond_syscall(compat_sys_migrate_pages);
/* block-layer dependent */
cond_syscall(sys_bdflush);
diff --git a/mm/migrate.c b/mm/migrate.c
index ba2453f9483..b4979d423d2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -952,7 +952,8 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
goto out;
pm[i].node = node;
- }
+ } else
+ pm[i].node = 0; /* anything to not match MAX_NUMNODES */
}
/* End marker */
pm[nr_pages].node = MAX_NUMNODES;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b55bb358b83..bf2f6cff1d6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -853,7 +853,7 @@ again:
pcp = &zone_pcp(zone, cpu)->pcp[cold];
local_irq_save(flags);
if (!pcp->count) {
- pcp->count += rmqueue_bulk(zone, 0,
+ pcp->count = rmqueue_bulk(zone, 0,
pcp->batch, &pcp->list);
if (unlikely(!pcp->count))
goto failed;
diff --git a/mm/readahead.c b/mm/readahead.c
index 1ba736ac036..23cb61a01c6 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -173,6 +173,8 @@ static int read_pages(struct address_space *mapping, struct file *filp,
if (mapping->a_ops->readpages) {
ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
+ /* Clean up the remaining pages */
+ put_pages_list(pages);
goto out;
}
diff --git a/mm/slab.c b/mm/slab.c
index 84c631f3074..3c4a7e34edd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -883,7 +883,7 @@ static void init_reap_node(int cpu)
if (node == MAX_NUMNODES)
node = first_node(node_online_map);
- __get_cpu_var(reap_node) = node;
+ per_cpu(reap_node, cpu) = node;
}
static void next_reap_node(void)
diff --git a/scripts/basic/docproc.c b/scripts/basic/docproc.c
index 4ab6cbf0922..d6071cbf13d 100644
--- a/scripts/basic/docproc.c
+++ b/scripts/basic/docproc.c
@@ -250,7 +250,7 @@ void intfunc(char * filename) { docfunctions(filename, NOFUNCTION); }
void extfunc(char * filename) { docfunctions(filename, FUNCTION); }
/*
- * Document spåecific function(s) in a file.
+ * Document specific function(s) in a file.
* Call kernel-doc with the following parameters:
* kernel-doc -docbook -function function1 [-function function2]
*/