aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/Kconfig9
-rw-r--r--arch/powerpc/configs/pseries_defconfig1
-rw-r--r--drivers/block/cciss.c22
-rw-r--r--drivers/char/moxa.c9
-rw-r--r--drivers/char/rio/host.h1
-rw-r--r--drivers/char/rio/rio_linux.c9
-rw-r--r--drivers/clocksource/acpi_pm.c6
-rw-r--r--drivers/ide/pci/generic.c13
-rw-r--r--drivers/md/bitmap.c10
-rw-r--r--drivers/md/md.c13
-rw-r--r--drivers/md/multipath.c2
-rw-r--r--drivers/md/raid10.c2
-rw-r--r--drivers/message/i2o/exec-osm.c2
-rw-r--r--drivers/net/ibmveth.c10
-rw-r--r--fs/dcache.c137
-rw-r--r--fs/inode.c36
-rw-r--r--fs/nfs/dir.c13
-rw-r--r--fs/splice.c105
-rw-r--r--include/linux/compat_ioctl.h1
-rw-r--r--include/linux/fs.h7
-rw-r--r--include/linux/mempolicy.h2
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--include/linux/pci.h13
-rw-r--r--include/linux/raid/bitmap.h20
-rw-r--r--include/linux/raid/md_p.h56
-rw-r--r--mm/filemap.c30
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/slab.c11
28 files changed, 402 insertions, 146 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8b691046557..2bd9b7fb0f6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -751,6 +751,15 @@ config ARCH_MEMORY_PROBE
def_bool y
depends on MEMORY_HOTPLUG
+# Some NUMA nodes have memory ranges that span
+# other nodes. Even though a pfn is valid and
+# between a node's start and end pfns, it may not
+# reside on that node. See memmap_init_zone()
+# for details.
+config NODES_SPAN_OTHER_NODES
+ def_bool y
+ depends on NEED_MULTIPLE_NODES
+
config PPC_64K_PAGES
bool "64k page size"
depends on PPC64
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 9828663652e..d2833c1a1f3 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -184,6 +184,7 @@ CONFIG_SPLIT_PTLOCK_CPUS=4
CONFIG_MIGRATION=y
CONFIG_RESOURCES_64BIT=y
CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
+CONFIG_NODES_SPAN_OTHER_NODES=y
# CONFIG_PPC_64K_PAGES is not set
CONFIG_SCHED_SMT=y
CONFIG_PROC_DEVICETREE=y
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index dcccaf2782f..bc6602606fb 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1923,7 +1923,6 @@ static void cciss_geometry_inquiry(int ctlr, int logvol,
{
int return_code;
unsigned long t;
- unsigned long rem;
memset(inq_buff, 0, sizeof(InquiryData_struct));
if (withirq)
@@ -1939,26 +1938,23 @@ static void cciss_geometry_inquiry(int ctlr, int logvol,
printk(KERN_WARNING
"cciss: reading geometry failed, volume "
"does not support reading geometry\n");
- drv->block_size = block_size;
- drv->nr_blocks = total_size;
drv->heads = 255;
drv->sectors = 32; // Sectors per track
- t = drv->heads * drv->sectors;
- drv->cylinders = total_size;
- rem = do_div(drv->cylinders, t);
} else {
- drv->block_size = block_size;
- drv->nr_blocks = total_size;
drv->heads = inq_buff->data_byte[6];
drv->sectors = inq_buff->data_byte[7];
drv->cylinders = (inq_buff->data_byte[4] & 0xff) << 8;
drv->cylinders += inq_buff->data_byte[5];
drv->raid_level = inq_buff->data_byte[8];
- t = drv->heads * drv->sectors;
- if (t > 1) {
- drv->cylinders = total_size;
- rem = do_div(drv->cylinders, t);
- }
+ }
+ drv->block_size = block_size;
+ drv->nr_blocks = total_size;
+ t = drv->heads * drv->sectors;
+ if (t > 1) {
+ unsigned rem = sector_div(total_size, t);
+ if (rem)
+ total_size++;
+ drv->cylinders = total_size;
}
} else { /* Get geometry failed */
printk(KERN_WARNING "cciss: reading geometry failed\n");
diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c
index b401383808c..96cb1f07332 100644
--- a/drivers/char/moxa.c
+++ b/drivers/char/moxa.c
@@ -130,6 +130,7 @@ static moxa_isa_board_conf moxa_isa_boards[] =
typedef struct _moxa_pci_devinfo {
ushort busNum;
ushort devNum;
+ struct pci_dev *pdev;
} moxa_pci_devinfo;
typedef struct _moxa_board_conf {
@@ -324,6 +325,9 @@ static int moxa_get_PCI_conf(struct pci_dev *p, int board_type, moxa_board_conf
board->busType = MOXA_BUS_TYPE_PCI;
board->pciInfo.busNum = p->bus->number;
board->pciInfo.devNum = p->devfn >> 3;
+ board->pciInfo.pdev = p;
+ /* don't lose the reference in the next pci_get_device iteration */
+ pci_dev_get(p);
return (0);
}
@@ -493,6 +497,11 @@ static void __exit moxa_exit(void)
if (tty_unregister_driver(moxaDriver))
printk("Couldn't unregister MOXA Intellio family serial driver\n");
put_tty_driver(moxaDriver);
+
+ for (i = 0; i < MAX_BOARDS; i++)
+ if (moxa_boards[i].busType == MOXA_BUS_TYPE_PCI)
+ pci_dev_put(moxa_boards[i].pciInfo.pdev);
+
if (verbose)
printk("Done\n");
}
diff --git a/drivers/char/rio/host.h b/drivers/char/rio/host.h
index ee2ddea7a63..23d0681fe49 100644
--- a/drivers/char/rio/host.h
+++ b/drivers/char/rio/host.h
@@ -44,6 +44,7 @@
** the host.
*/
struct Host {
+ struct pci_dev *pdev;
unsigned char Type; /* RIO_EISA, RIO_MCA, ... */
unsigned char Ivec; /* POLLED or ivec number */
unsigned char Mode; /* Control stuff */
diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c
index c382df0f82f..7ac68cb3bed 100644
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -1017,6 +1017,10 @@ static int __init rio_init(void)
rio_dprintk(RIO_DEBUG_PROBE, "Hmm Tested ok, uniqid = %x.\n", p->RIOHosts[p->RIONumHosts].UniqueNum);
fix_rio_pci(pdev);
+
+ p->RIOHosts[p->RIONumHosts].pdev = pdev;
+ pci_dev_get(pdev);
+
p->RIOLastPCISearch = 0;
p->RIONumHosts++;
found++;
@@ -1066,6 +1070,9 @@ static int __init rio_init(void)
((readb(&p->RIOHosts[p->RIONumHosts].Unique[1]) & 0xFF) << 8) | ((readb(&p->RIOHosts[p->RIONumHosts].Unique[2]) & 0xFF) << 16) | ((readb(&p->RIOHosts[p->RIONumHosts].Unique[3]) & 0xFF) << 24);
rio_dprintk(RIO_DEBUG_PROBE, "Hmm Tested ok, uniqid = %x.\n", p->RIOHosts[p->RIONumHosts].UniqueNum);
+ p->RIOHosts[p->RIONumHosts].pdev = pdev;
+ pci_dev_get(pdev);
+
p->RIOLastPCISearch = 0;
p->RIONumHosts++;
found++;
@@ -1181,6 +1188,8 @@ static void __exit rio_exit(void)
}
/* It is safe/allowed to del_timer a non-active timer */
del_timer(&hp->timer);
+ if (hp->Type == RIO_PCI)
+ pci_dev_put(hp->pdev);
}
if (misc_deregister(&rio_fw_device) < 0) {
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 7ad3be8c0f4..7fcb77a9d01 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -54,8 +54,8 @@ static cycle_t acpi_pm_read_verified(void)
v1 = read_pmtmr();
v2 = read_pmtmr();
v3 = read_pmtmr();
- } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
- || (v3 > v1 && v3 < v2));
+ } while (unlikely((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
+ || (v3 > v1 && v3 < v2)));
return (cycle_t)v2;
}
@@ -138,6 +138,8 @@ static void __devinit acpi_pm_check_graylist(struct pci_dev *dev)
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0,
acpi_pm_check_graylist);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_LE,
+ acpi_pm_check_graylist);
#endif
diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c
index 5b77a5bcbf0..ad418ce882c 100644
--- a/drivers/ide/pci/generic.c
+++ b/drivers/ide/pci/generic.c
@@ -40,6 +40,19 @@
static int ide_generic_all; /* Set to claim all devices */
+/*
+ * the module_param_named() was added for the modular case
+ * the __setup() is left as compatibility for existing setups
+ */
+#ifndef MODULE
+static int __init ide_generic_all_on(char *unused)
+{
+ ide_generic_all = 1;
+ printk(KERN_INFO "IDE generic will claim all unknown PCI IDE storage controllers.");
+ return 1;
+}
+__setup("all-generic-ide", ide_generic_all_on);
+#endif
module_param_named(all_generic_ide, ide_generic_all, bool, 0444);
MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE storage controllers.");
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index d47d38ac71b..d6f614738bb 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -536,7 +536,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) "
"-- forcing full recovery\n", bmname(bitmap), events,
(unsigned long long) bitmap->mddev->events);
- sb->state |= BITMAP_STALE;
+ sb->state |= cpu_to_le32(BITMAP_STALE);
}
success:
/* assign fields using values from superblock */
@@ -544,11 +544,11 @@ success:
bitmap->daemon_sleep = daemon_sleep;
bitmap->daemon_lastrun = jiffies;
bitmap->max_write_behind = write_behind;
- bitmap->flags |= sb->state;
+ bitmap->flags |= le32_to_cpu(sb->state);
if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
bitmap->flags |= BITMAP_HOSTENDIAN;
bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
- if (sb->state & BITMAP_STALE)
+ if (sb->state & cpu_to_le32(BITMAP_STALE))
bitmap->events_cleared = bitmap->mddev->events;
err = 0;
out:
@@ -578,9 +578,9 @@ static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
spin_unlock_irqrestore(&bitmap->lock, flags);
sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
switch (op) {
- case MASK_SET: sb->state |= bits;
+ case MASK_SET: sb->state |= cpu_to_le32(bits);
break;
- case MASK_UNSET: sb->state &= ~bits;
+ case MASK_UNSET: sb->state &= cpu_to_le32(~bits);
break;
default: BUG();
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f7f19088f3b..7daa7b1e145 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -974,12 +974,13 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
* version 1 superblock
*/
-static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
+static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
{
- unsigned int disk_csum, csum;
+ __le32 disk_csum;
+ u32 csum;
unsigned long long newcsum;
int size = 256 + le32_to_cpu(sb->max_dev)*2;
- unsigned int *isuper = (unsigned int*)sb;
+ __le32 *isuper = (__le32*)sb;
int i;
disk_csum = sb->sb_csum;
@@ -989,7 +990,7 @@ static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
newcsum += le32_to_cpu(*isuper++);
if (size == 2)
- newcsum += le16_to_cpu(*(unsigned short*) isuper);
+ newcsum += le16_to_cpu(*(__le16*) isuper);
csum = (newcsum & 0xffffffff) + (newcsum >> 32);
sb->sb_csum = disk_csum;
@@ -1106,7 +1107,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
if (le32_to_cpu(sb->chunksize))
rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1);
- if (le32_to_cpu(sb->size) > rdev->size*2)
+ if (le64_to_cpu(sb->size) > rdev->size*2)
return -EINVAL;
return ret;
}
@@ -1228,7 +1229,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
else
sb->resync_offset = cpu_to_le64(0);
- sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors);
+ sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
sb->raid_disks = cpu_to_le32(mddev->raid_disks);
sb->size = cpu_to_le64(mddev->size<<1);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 171ff41b52b..a6260f0e3b9 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -501,7 +501,7 @@ static int multipath_run (mddev_t *mddev)
mdname(mddev));
goto out_free_conf;
}
- mddev->degraded = conf->raid_disks = conf->working_disks;
+ mddev->degraded = conf->raid_disks - conf->working_disks;
conf->pool = mempool_create_kzalloc_pool(NR_RESERVED_BUFS,
sizeof(struct multipath_bh));
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 1250f0eab4a..74f17a9a6eb 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2079,7 +2079,7 @@ static int run(mddev_t *mddev)
disk = conf->mirrors + i;
if (!disk->rdev ||
- !test_bit(In_sync, &rdev->flags)) {
+ !test_bit(In_sync, &disk->rdev->flags)) {
disk->head_position = 0;
mddev->degraded++;
}
diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c
index 91f95d172ca..01a5a702b03 100644
--- a/drivers/message/i2o/exec-osm.c
+++ b/drivers/message/i2o/exec-osm.c
@@ -127,7 +127,7 @@ int i2o_msg_post_wait_mem(struct i2o_controller *c, struct i2o_message *msg,
DECLARE_WAIT_QUEUE_HEAD(wq);
struct i2o_exec_wait *wait;
static u32 tcntxt = 0x80000000;
- long flags;
+ unsigned long flags;
int rc = 0;
wait = i2o_exec_wait_alloc();
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 2802db23d3c..44c9f993dcc 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -212,8 +212,8 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
break;
}
- free_index = pool->consumer_index++ % pool->size;
- pool->consumer_index = free_index;
+ free_index = pool->consumer_index;
+ pool->consumer_index = (pool->consumer_index + 1) % pool->size;
index = pool->free_map[free_index];
ibmveth_assert(index != IBM_VETH_INVALID_MAP);
@@ -329,8 +329,10 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, u64
adapter->rx_buff_pool[pool].buff_size,
DMA_FROM_DEVICE);
- free_index = adapter->rx_buff_pool[pool].producer_index++ % adapter->rx_buff_pool[pool].size;
- adapter->rx_buff_pool[pool].producer_index = free_index;
+ free_index = adapter->rx_buff_pool[pool].producer_index;
+ adapter->rx_buff_pool[pool].producer_index
+ = (adapter->rx_buff_pool[pool].producer_index + 1)
+ % adapter->rx_buff_pool[pool].size;
adapter->rx_buff_pool[pool].free_map[free_index] = index;
mb();
diff --git a/fs/dcache.c b/fs/dcache.c
index 2bac4ba1d1d..a1ff91eef10 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1469,23 +1469,21 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
* deleted it.
*/
-/**
- * d_move - move a dentry
+/*
+ * d_move_locked - move a dentry
* @dentry: entry to move
* @target: new dentry
*
* Update the dcache to reflect the move of a file name. Negative
* dcache entries should not be moved in this way.
*/
-
-void d_move(struct dentry * dentry, struct dentry * target)
+static void d_move_locked(struct dentry * dentry, struct dentry * target)
{
struct hlist_head *list;
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
- spin_lock(&dcache_lock);
write_seqlock(&rename_lock);
/*
* XXXX: do we really need to take target->d_lock?
@@ -1536,10 +1534,84 @@ already_unhashed:
fsnotify_d_move(dentry);
spin_unlock(&dentry->d_lock);
write_sequnlock(&rename_lock);
+}
+
+/**
+ * d_move - move a dentry
+ * @dentry: entry to move
+ * @target: new dentry
+ *
+ * Update the dcache to reflect the move of a file name. Negative
+ * dcache entries should not be moved in this way.
+ */
+
+void d_move(struct dentry * dentry, struct dentry * target)
+{
+ spin_lock(&dcache_lock);
+ d_move_locked(dentry, target);
spin_unlock(&dcache_lock);
}
/*
+ * Helper that returns 1 if p1 is a parent of p2, else 0
+ */
+static int d_isparent(struct dentry *p1, struct dentry *p2)
+{
+ struct dentry *p;
+
+ for (p = p2; p->d_parent != p; p = p->d_parent) {
+ if (p->d_parent == p1)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * This helper attempts to cope with remotely renamed directories
+ *
+ * It assumes that the caller is already holding
+ * dentry->d_parent->d_inode->i_mutex and the dcache_lock
+ *
+ * Note: If ever the locking in lock_rename() changes, then please
+ * remember to update this too...
+ *
+ * On return, dcache_lock will have been unlocked.
+ */
+static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
+{
+ struct mutex *m1 = NULL, *m2 = NULL;
+ struct dentry *ret;
+
+ /* If alias and dentry share a parent, then no extra locks required */
+ if (alias->d_parent == dentry->d_parent)
+ goto out_unalias;
+
+ /* Check for loops */
+ ret = ERR_PTR(-ELOOP);
+ if (d_isparent(alias, dentry))
+ goto out_err;
+
+ /* See lock_rename() */
+ ret = ERR_PTR(-EBUSY);
+ if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
+ goto out_err;
+ m1 = &dentry->d_sb->s_vfs_rename_mutex;
+ if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+ goto out_err;
+ m2 = &alias->d_parent->d_inode->i_mutex;
+out_unalias:
+ d_move_locked(alias, dentry);
+ ret = alias;
+out_err:
+ spin_unlock(&dcache_lock);
+ if (m2)
+ mutex_unlock(m2);
+ if (m1)
+ mutex_unlock(m1);
+ return ret;
+}
+
+/*
* Prepare an anonymous dentry for life in the superblock's dentry tree as a
* named dentry in place of the dentry to be replaced.
*/
@@ -1581,7 +1653,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
*/
struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
{
- struct dentry *alias, *actual;
+ struct dentry *actual;
BUG_ON(!d_unhashed(dentry));
@@ -1593,26 +1665,27 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
goto found_lock;
}
- /* See if a disconnected directory already exists as an anonymous root
- * that we should splice into the tree instead */
- if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) {
- spin_lock(&alias->d_lock);
-
- /* Is this a mountpoint that we could splice into our tree? */
- if (IS_ROOT(alias))
- goto connect_mountpoint;
-
- if (alias->d_name.len == dentry->d_name.len &&
- alias->d_parent == dentry->d_parent &&
- memcmp(alias->d_name.name,
- dentry->d_name.name,
- dentry->d_name.len) == 0)
- goto replace_with_alias;
-
- spin_unlock(&alias->d_lock);
-
- /* Doh! Seem to be aliasing directories for some reason... */
- dput(alias);
+ if (S_ISDIR(inode->i_mode)) {
+ struct dentry *alias;
+
+ /* Does an aliased dentry already exist? */
+ alias = __d_find_alias(inode, 0);
+ if (alias) {
+ actual = alias;
+ /* Is this an anonymous mountpoint that we could splice
+ * into our tree? */
+ if (IS_ROOT(alias)) {
+ spin_lock(&alias->d_lock);
+ __d_materialise_dentry(dentry, alias);
+ __d_drop(alias);
+ goto found;
+ }
+ /* Nope, but we must(!) avoid directory aliasing */
+ actual = __d_unalias(dentry, alias);
+ if (IS_ERR(actual))
+ dput(alias);
+ goto out_nolock;
+ }
}
/* Add a unique reference */
@@ -1628,7 +1701,7 @@ found:
_d_rehash(actual);
spin_unlock(&actual->d_lock);
spin_unlock(&dcache_lock);
-
+out_nolock:
if (actual == dentry) {
security_d_instantiate(dentry, inode);
return NULL;
@@ -1637,16 +1710,6 @@ found:
iput(inode);
return actual;
- /* Convert the anonymous/root alias into an ordinary dentry */
-connect_mountpoint:
- __d_materialise_dentry(dentry, alias);
-
- /* Replace the candidate dentry with the alias in the tree */
-replace_with_alias:
- __d_drop(alias);
- actual = alias;
- goto found;
-
shouldnt_be_hashed:
spin_unlock(&dcache_lock);
BUG();
diff --git a/fs/inode.c b/fs/inode.c
index d9a21d12292..26cdb115ce6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode)
wake_up_bit(&inode->i_state, __I_LOCK);
}
+/*
+ * We rarely want to lock two inodes that do not have a parent/child
+ * relationship (such as directory, child inode) simultaneously. The
+ * vast majority of file systems should be able to get along fine
+ * without this. Do not use these functions except as a last resort.
+ */
+void inode_double_lock(struct inode *inode1, struct inode *inode2)
+{
+ if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
+ if (inode1)
+ mutex_lock(&inode1->i_mutex);
+ else if (inode2)
+ mutex_lock(&inode2->i_mutex);
+ return;
+ }
+
+ if (inode1 < inode2) {
+ mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+ } else {
+ mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
+ }
+}
+EXPORT_SYMBOL(inode_double_lock);
+
+void inode_double_unlock(struct inode *inode1, struct inode *inode2)
+{
+ if (inode1)
+ mutex_unlock(&inode1->i_mutex);
+
+ if (inode2 && inode2 != inode1)
+ mutex_unlock(&inode2->i_mutex);
+}
+EXPORT_SYMBOL(inode_double_unlock);
+
static __initdata unsigned long ihash_entries;
static int __init set_ihash_entries(char *str)
{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4133ef5264e..b34cd16f472 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -935,8 +935,17 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
no_entry:
res = d_materialise_unique(dentry, inode);
- if (res != NULL)
+ if (res != NULL) {
+ struct dentry *parent;
+ if (IS_ERR(res))
+ goto out_unlock;
+ /* Was a directory renamed! */
+ parent = dget_parent(res);
+ if (!IS_ROOT(parent))
+ nfs_mark_for_revalidate(parent->d_inode);
+ dput(parent);
dentry = res;
+ }
nfs_renew_times(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out_unlock:
@@ -1132,6 +1141,8 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
alias = d_materialise_unique(dentry, inode);
if (alias != NULL) {
dput(dentry);
+ if (IS_ERR(alias))
+ return NULL;
dentry = alias;
}
diff --git a/fs/splice.c b/fs/splice.c
index a567010b62a..49fb9f12993 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -707,9 +707,9 @@ out_ret:
* key here is the 'actor' worker passed in that actually moves the data
* to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
*/
-ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
- loff_t *ppos, size_t len, unsigned int flags,
- splice_actor *actor)
+static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos, size_t len,
+ unsigned int flags, splice_actor *actor)
{
int ret, do_wakeup, err;
struct splice_desc sd;
@@ -722,9 +722,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
sd.file = out;
sd.pos = *ppos;
- if (pipe->inode)
- mutex_lock(&pipe->inode->i_mutex);
-
for (;;) {
if (pipe->nrbufs) {
struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
@@ -797,9 +794,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
pipe_wait(pipe);
}
- if (pipe->inode)
- mutex_unlock(&pipe->inode->i_mutex);
-
if (do_wakeup) {
smp_mb();
if (waitqueue_active(&pipe->wait))
@@ -810,6 +804,73 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
return ret;
}
+ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags,
+ splice_actor *actor)
+{
+ ssize_t ret;
+ struct inode *inode = out->f_mapping->host;
+
+ /*
+ * The actor worker might be calling ->prepare_write and
+ * ->commit_write. Most of the time, these expect i_mutex to
+ * be held. Since this may result in an ABBA deadlock with
+ * pipe->inode, we have to order lock acquiry here.
+ */
+ inode_double_lock(inode, pipe->inode);
+ ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor);
+ inode_double_unlock(inode, pipe->inode);
+
+ return ret;
+}
+
+/**
+ * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
+ * @pipe: pipe info
+ * @out: file to write to
+ * @len: number of bytes to splice
+ * @flags: splice modifier flags
+ *
+ * Will either move or copy pages (determined by @flags options) from
+ * the given pipe inode to the given file. The caller is responsible
+ * for acquiring i_mutex on both inodes.
+ *
+ */
+ssize_t
+generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
+{
+ struct address_space *mapping = out->f_mapping;
+ struct inode *inode = mapping->host;
+ ssize_t ret;
+ int err;
+
+ err = remove_suid(out->f_dentry);
+ if (unlikely(err))
+ return err;
+
+ ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+ if (ret > 0) {
+ *ppos += ret;
+
+ /*
+ * If file or inode is SYNC and we actually wrote some data,
+ * sync it.
+ */
+ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
+ err = generic_osync_inode(inode, mapping,
+ OSYNC_METADATA|OSYNC_DATA);
+
+ if (err)
+ ret = err;
+ }
+ }
+
+ return ret;
+}
+
+EXPORT_SYMBOL(generic_file_splice_write_nolock);
+
/**
* generic_file_splice_write - splice data from a pipe to a file
* @pipe: pipe info
@@ -826,12 +887,21 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags)
{
struct address_space *mapping = out->f_mapping;
+ struct inode *inode = mapping->host;
ssize_t ret;
+ int err;
+
+ err = should_remove_suid(out->f_dentry);
+ if (unlikely(err)) {
+ mutex_lock(&inode->i_mutex);
+ err = __remove_suid(out->f_dentry, err);
+ mutex_unlock(&inode->i_mutex);
+ if (err)
+ return err;
+ }
ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
if (ret > 0) {
- struct inode *inode = mapping->host;
-
*ppos += ret;
/*
@@ -839,8 +909,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
* sync it.
*/
if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
- int err;
-
mutex_lock(&inode->i_mutex);
err = generic_osync_inode(inode, mapping,
OSYNC_METADATA|OSYNC_DATA);
@@ -1400,13 +1468,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* grabbing by inode address. Otherwise two different processes
* could deadlock (one doing tee from A -> B, the other from B -> A).
*/
- if (ipipe->inode < opipe->inode) {
- mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
- } else {
- mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
- }
+ inode_double_lock(ipipe->inode, opipe->inode);
do {
if (!opipe->readers) {
@@ -1450,8 +1512,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
i++;
} while (len);
- mutex_unlock(&ipipe->inode->i_mutex);
- mutex_unlock(&opipe->inode->i_mutex);
+ inode_double_unlock(ipipe->inode, opipe->inode);
/*
* If we put data in the output pipe, wakeup any potential readers.
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index cfdb4f6a89d..c26c3adcfac 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -131,6 +131,7 @@ COMPATIBLE_IOCTL(RUN_ARRAY)
COMPATIBLE_IOCTL(STOP_ARRAY)
COMPATIBLE_IOCTL(STOP_ARRAY_RO)
COMPATIBLE_IOCTL(RESTART_ARRAY_RW)
+COMPATIBLE_IOCTL(GET_BITMAP_FILE)
ULONG_IOCTL(SET_BITMAP_FILE)
/* DM */
COMPATIBLE_IOCTL(DM_VERSION_32)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 661c7c57214..2fe6e3f900b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -623,6 +623,9 @@ enum inode_i_mutex_lock_class
I_MUTEX_QUOTA
};
+extern void inode_double_lock(struct inode *inode1, struct inode *inode2);
+extern void inode_double_unlock(struct inode *inode1, struct inode *inode2);
+
/*
* NOTE: in a 32bit arch with a preemptable kernel and
* an UP compile the i_size_read/write must be atomic
@@ -1709,6 +1712,8 @@ extern void __iget(struct inode * inode);
extern void clear_inode(struct inode *);
extern void destroy_inode(struct inode *);
extern struct inode *new_inode(struct super_block *);
+extern int __remove_suid(struct dentry *, int);
+extern int should_remove_suid(struct dentry *);
extern int remove_suid(struct dentry *);
extern void remove_dquot_ref(struct super_block *, int, struct list_head *);
@@ -1755,6 +1760,8 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *,
struct pipe_inode_info *, size_t, unsigned int);
extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
+extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *,
+ struct file *, loff_t *, size_t, unsigned int);
extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
struct file *out, loff_t *, size_t len, unsigned int flags);
extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 09f0f575ddf..daabb3aa1ec 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -150,7 +150,7 @@ extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
extern void mpol_fix_fork_child_flag(struct task_struct *p);
#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
-#ifdef CONFIG_CPUSET
+#ifdef CONFIG_CPUSETS
#define current_cpuset_is_being_rebound() \
(cpuset_being_rebound == current->cpuset)
#else
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 59855b8718a..ed0762b283a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -674,6 +674,12 @@ void sparse_init(void);
#define sparse_index_init(_sec, _nid) do {} while (0)
#endif /* CONFIG_SPARSEMEM */
+#ifdef CONFIG_NODES_SPAN_OTHER_NODES
+#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid))
+#else
+#define early_pfn_in_nid(pfn, nid) (1)
+#endif
+
#ifndef early_pfn_valid
#define early_pfn_valid(pfn) (1)
#endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4689e2a699c..09be0f81b27 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -455,7 +455,11 @@ int pci_find_next_capability (struct pci_dev *dev, u8 pos, int cap);
int pci_find_ext_capability (struct pci_dev *dev, int cap);
struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
-struct pci_dev *pci_get_device (unsigned int vendor, unsigned int device, struct pci_dev *from);
+struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device,
+ struct pci_dev *from);
+struct pci_dev *pci_get_device_reverse(unsigned int vendor, unsigned int device,
+ struct pci_dev *from);
+
struct pci_dev *pci_get_subsys (unsigned int vendor, unsigned int device,
unsigned int ss_vendor, unsigned int ss_device,
struct pci_dev *from);
@@ -660,7 +664,12 @@ static inline struct pci_dev *pci_find_device(unsigned int vendor, unsigned int
static inline struct pci_dev *pci_find_slot(unsigned int bus, unsigned int devfn)
{ return NULL; }
-static inline struct pci_dev *pci_get_device (unsigned int vendor, unsigned int device, struct pci_dev *from)
+static inline struct pci_dev *pci_get_device(unsigned int vendor,
+ unsigned int device, struct pci_dev *from)
+{ return NULL; }
+
+static inline struct pci_dev *pci_get_device_reverse(unsigned int vendor,
+ unsigned int device, struct pci_dev *from)
{ return NULL; }
static inline struct pci_dev *pci_get_subsys (unsigned int vendor, unsigned int device,
diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
index 84d88775185..ebd42a3710b 100644
--- a/include/linux/raid/bitmap.h
+++ b/include/linux/raid/bitmap.h
@@ -146,16 +146,16 @@ enum bitmap_state {
/* the superblock at the front of the bitmap file -- little endian */
typedef struct bitmap_super_s {
- __u32 magic; /* 0 BITMAP_MAGIC */
- __u32 version; /* 4 the bitmap major for now, could change... */
- __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */
- __u64 events; /* 24 event counter for the bitmap (1)*/
- __u64 events_cleared;/*32 event counter when last bit cleared (2) */
- __u64 sync_size; /* 40 the size of the md device's sync range(3) */
- __u32 state; /* 48 bitmap state information */
- __u32 chunksize; /* 52 the bitmap chunk size in bytes */
- __u32 daemon_sleep; /* 56 seconds between disk flushes */
- __u32 write_behind; /* 60 number of outstanding write-behind writes */
+ __le32 magic; /* 0 BITMAP_MAGIC */
+ __le32 version; /* 4 the bitmap major for now, could change... */
+ __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */
+ __le64 events; /* 24 event counter for the bitmap (1)*/
+ __le64 events_cleared;/*32 event counter when last bit cleared (2) */
+ __le64 sync_size; /* 40 the size of the md device's sync range(3) */
+ __le32 state; /* 48 bitmap state information */
+ __le32 chunksize; /* 52 the bitmap chunk size in bytes */
+ __le32 daemon_sleep; /* 56 seconds between disk flushes */
+ __le32 write_behind; /* 60 number of outstanding write-behind writes */
__u8 pad[256 - 64]; /* set to zero */
} bitmap_super_t;
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index b6ebc69bae5..3f2cd98c508 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -206,52 +206,52 @@ static inline __u64 md_event(mdp_super_t *sb) {
*/
struct mdp_superblock_1 {
/* constant array information - 128 bytes */
- __u32 magic; /* MD_SB_MAGIC: 0xa92b4efc - little endian */
- __u32 major_version; /* 1 */
- __u32 feature_map; /* bit 0 set if 'bitmap_offset' is meaningful */
- __u32 pad0; /* always set to 0 when writing */
+ __le32 magic; /* MD_SB_MAGIC: 0xa92b4efc - little endian */
+ __le32 major_version; /* 1 */
+ __le32 feature_map; /* bit 0 set if 'bitmap_offset' is meaningful */
+ __le32 pad0; /* always set to 0 when writing */
__u8 set_uuid[16]; /* user-space generated. */
char set_name[32]; /* set and interpreted by user-space */
- __u64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/
- __u32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */
- __u32 layout; /* only for raid5 and raid10 currently */
- __u64 size; /* used size of component devices, in 512byte sectors */
+ __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/
+ __le32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */
+ __le32 layout; /* only for raid5 and raid10 currently */
+ __le64 size; /* used size of component devices, in 512byte sectors */
- __u32 chunksize; /* in 512byte sectors */
- __u32 raid_disks;
- __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts
+ __le32 chunksize; /* in 512byte sectors */
+ __le32 raid_disks;
+ __le32 bitmap_offset; /* sectors after start of superblock that bitmap starts
* NOTE: signed, so bitmap can be before superblock
* only meaningful of feature_map[0] is set.
*/
/* These are only valid with feature bit '4' */
- __u32 new_level; /* new level we are reshaping to */
- __u64 reshape_position; /* next address in array-space for reshape */
- __u32 delta_disks; /* change in number of raid_disks */
- __u32 new_layout; /* new layout */
- __u32 new_chunk; /* new chunk size (bytes) */
+ __le32 new_level; /* new level we are reshaping to */
+ __le64 reshape_position; /* next address in array-space for reshape */
+ __le32 delta_disks; /* change in number of raid_disks */
+ __le32 new_layout; /* new layout */
+ __le32 new_chunk; /* new chunk size (bytes) */
__u8 pad1[128-124]; /* set to 0 when written */
/* constant this-device information - 64 bytes */
- __u64 data_offset; /* sector start of data, often 0 */
- __u64 data_size; /* sectors in this device that can be used for data */
- __u64 super_offset; /* sector start of this superblock */
- __u64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */
- __u32 dev_number; /* permanent identifier of this device - not role in raid */
- __u32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */
+ __le64 data_offset; /* sector start of data, often 0 */
+ __le64 data_size; /* sectors in this device that can be used for data */
+ __le64 super_offset; /* sector start of this superblock */
+ __le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */
+ __le32 dev_number; /* permanent identifier of this device - not role in raid */
+ __le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */
__u8 device_uuid[16]; /* user-space setable, ignored by kernel */
__u8 devflags; /* per-device flags. Only one defined...*/
#define WriteMostly1 1 /* mask for writemostly flag in above */
__u8 pad2[64-57]; /* set to 0 when writing */
/* array state information - 64 bytes */
- __u64 utime; /* 40 bits second, 24 btes microseconds */
- __u64 events; /* incremented when superblock updated */
- __u64 resync_offset; /* data before this offset (from data_offset) known to be in sync */
- __u32 sb_csum; /* checksum upto devs[max_dev] */
- __u32 max_dev; /* size of devs[] array to consider */
+ __le64 utime; /* 40 bits second, 24 btes microseconds */
+ __le64 events; /* incremented when superblock updated */
+ __le64 resync_offset; /* data before this offset (from data_offset) known to be in sync */
+ __le32 sb_csum; /* checksum upto devs[max_dev] */
+ __le32 max_dev; /* size of devs[] array to consider */
__u8 pad3[64-32]; /* set to 0 when writing */
/* device state information. Indexed by dev_number.
@@ -260,7 +260,7 @@ struct mdp_superblock_1 {
* into the 'roles' value. If a device is spare or faulty, then it doesn't
* have a meaningful role.
*/
- __u16 dev_roles[0]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty */
+ __le16 dev_roles[0]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty */
};
/* feature_map bits */
diff --git a/mm/filemap.c b/mm/filemap.c
index 8558732e85c..cb26e33fd0f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1884,11 +1884,10 @@ repeat:
* if suid or (sgid and xgrp)
* remove privs
*/
-int remove_suid(struct dentry *dentry)
+int should_remove_suid(struct dentry *dentry)
{
mode_t mode = dentry->d_inode->i_mode;
int kill = 0;
- int result = 0;
/* suid always must be killed */
if (unlikely(mode & S_ISUID))
@@ -1901,13 +1900,28 @@ int remove_suid(struct dentry *dentry)
if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
kill |= ATTR_KILL_SGID;
- if (unlikely(kill && !capable(CAP_FSETID))) {
- struct iattr newattrs;
+ if (unlikely(kill && !capable(CAP_FSETID)))
+ return kill;
- newattrs.ia_valid = ATTR_FORCE | kill;
- result = notify_change(dentry, &newattrs);
- }
- return result;
+ return 0;
+}
+
+int __remove_suid(struct dentry *dentry, int kill)
+{
+ struct iattr newattrs;
+
+ newattrs.ia_valid = ATTR_FORCE | kill;
+ return notify_change(dentry, &newattrs);
+}
+
+int remove_suid(struct dentry *dentry)
+{
+ int kill = should_remove_suid(dentry);
+
+ if (unlikely(kill))
+ return __remove_suid(dentry, kill);
+
+ return 0;
}
EXPORT_SYMBOL(remove_suid);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ebd425c2e2a..f5fc45472d5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1689,6 +1689,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
if (!early_pfn_valid(pfn))
continue;
+ if (!early_pfn_in_nid(pfn, nid))
+ continue;
page = pfn_to_page(pfn);
set_page_links(page, zone, nid, pfn);
init_page_count(page);
diff --git a/mm/slab.c b/mm/slab.c
index 266449d604b..84c631f3074 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3152,12 +3152,15 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
struct zone **z;
void *obj = NULL;
- for (z = zonelist->zones; *z && !obj; z++)
+ for (z = zonelist->zones; *z && !obj; z++) {
+ int nid = zone_to_nid(*z);
+
if (zone_idx(*z) <= ZONE_NORMAL &&
- cpuset_zone_allowed(*z, flags))
+ cpuset_zone_allowed(*z, flags) &&
+ cache->nodelists[nid])
obj = __cache_alloc_node(cache,
- flags | __GFP_THISNODE,
- zone_to_nid(*z));
+ flags | __GFP_THISNODE, nid);
+ }
return obj;
}