diff options
-rw-r--r-- | arch/powerpc/Kconfig | 9 | ||||
-rw-r--r-- | arch/powerpc/configs/pseries_defconfig | 1 | ||||
-rw-r--r-- | drivers/block/cciss.c | 22 | ||||
-rw-r--r-- | drivers/char/moxa.c | 9 | ||||
-rw-r--r-- | drivers/char/rio/host.h | 1 | ||||
-rw-r--r-- | drivers/char/rio/rio_linux.c | 9 | ||||
-rw-r--r-- | drivers/clocksource/acpi_pm.c | 6 | ||||
-rw-r--r-- | drivers/ide/pci/generic.c | 13 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 10 | ||||
-rw-r--r-- | drivers/md/md.c | 13 | ||||
-rw-r--r-- | drivers/md/multipath.c | 2 | ||||
-rw-r--r-- | drivers/md/raid10.c | 2 | ||||
-rw-r--r-- | drivers/message/i2o/exec-osm.c | 2 | ||||
-rw-r--r-- | drivers/net/ibmveth.c | 10 | ||||
-rw-r--r-- | fs/dcache.c | 137 | ||||
-rw-r--r-- | fs/inode.c | 36 | ||||
-rw-r--r-- | fs/nfs/dir.c | 13 | ||||
-rw-r--r-- | fs/splice.c | 105 | ||||
-rw-r--r-- | include/linux/compat_ioctl.h | 1 | ||||
-rw-r--r-- | include/linux/fs.h | 7 | ||||
-rw-r--r-- | include/linux/mempolicy.h | 2 | ||||
-rw-r--r-- | include/linux/mmzone.h | 6 | ||||
-rw-r--r-- | include/linux/pci.h | 13 | ||||
-rw-r--r-- | include/linux/raid/bitmap.h | 20 | ||||
-rw-r--r-- | include/linux/raid/md_p.h | 56 | ||||
-rw-r--r-- | mm/filemap.c | 30 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 | ||||
-rw-r--r-- | mm/slab.c | 11 |
28 files changed, 402 insertions, 146 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8b691046557..2bd9b7fb0f6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -751,6 +751,15 @@ config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG +# Some NUMA nodes have memory ranges that span +# other nodes. Even though a pfn is valid and +# between a node's start and end pfns, it may not +# reside on that node. See memmap_init_zone() +# for details. +config NODES_SPAN_OTHER_NODES + def_bool y + depends on NEED_MULTIPLE_NODES + config PPC_64K_PAGES bool "64k page size" depends on PPC64 diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 9828663652e..d2833c1a1f3 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -184,6 +184,7 @@ CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MIGRATION=y CONFIG_RESOURCES_64BIT=y CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y +CONFIG_NODES_SPAN_OTHER_NODES=y # CONFIG_PPC_64K_PAGES is not set CONFIG_SCHED_SMT=y CONFIG_PROC_DEVICETREE=y diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index dcccaf2782f..bc6602606fb 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1923,7 +1923,6 @@ static void cciss_geometry_inquiry(int ctlr, int logvol, { int return_code; unsigned long t; - unsigned long rem; memset(inq_buff, 0, sizeof(InquiryData_struct)); if (withirq) @@ -1939,26 +1938,23 @@ static void cciss_geometry_inquiry(int ctlr, int logvol, printk(KERN_WARNING "cciss: reading geometry failed, volume " "does not support reading geometry\n"); - drv->block_size = block_size; - drv->nr_blocks = total_size; drv->heads = 255; drv->sectors = 32; // Sectors per track - t = drv->heads * drv->sectors; - drv->cylinders = total_size; - rem = do_div(drv->cylinders, t); } else { - drv->block_size = block_size; - drv->nr_blocks = total_size; drv->heads = inq_buff->data_byte[6]; drv->sectors = inq_buff->data_byte[7]; drv->cylinders = (inq_buff->data_byte[4] & 0xff) << 8; drv->cylinders += inq_buff->data_byte[5]; drv->raid_level = inq_buff->data_byte[8]; - t = drv->heads * drv->sectors; - if (t > 1) { - drv->cylinders = total_size; - rem = do_div(drv->cylinders, t); - } + } + drv->block_size = block_size; + drv->nr_blocks = total_size; + t = drv->heads * drv->sectors; + if (t > 1) { + unsigned rem = sector_div(total_size, t); + if (rem) + total_size++; + drv->cylinders = total_size; } } else { /* Get geometry failed */ printk(KERN_WARNING "cciss: reading geometry failed\n"); diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c index b401383808c..96cb1f07332 100644 --- a/drivers/char/moxa.c +++ b/drivers/char/moxa.c @@ -130,6 +130,7 @@ static moxa_isa_board_conf moxa_isa_boards[] = typedef struct _moxa_pci_devinfo { ushort busNum; ushort devNum; + struct pci_dev *pdev; } moxa_pci_devinfo; typedef struct _moxa_board_conf { @@ -324,6 +325,9 @@ static int moxa_get_PCI_conf(struct pci_dev *p, int board_type, moxa_board_conf board->busType = MOXA_BUS_TYPE_PCI; board->pciInfo.busNum = p->bus->number; board->pciInfo.devNum = p->devfn >> 3; + board->pciInfo.pdev = p; + /* don't lose the reference in the next pci_get_device iteration */ + pci_dev_get(p); return (0); } @@ -493,6 +497,11 @@ static void __exit moxa_exit(void) if (tty_unregister_driver(moxaDriver)) printk("Couldn't unregister MOXA Intellio family serial driver\n"); put_tty_driver(moxaDriver); + + for (i = 0; i < MAX_BOARDS; i++) + if (moxa_boards[i].busType == MOXA_BUS_TYPE_PCI) + pci_dev_put(moxa_boards[i].pciInfo.pdev); + if (verbose) printk("Done\n"); } diff --git a/drivers/char/rio/host.h b/drivers/char/rio/host.h index ee2ddea7a63..23d0681fe49 100644 --- a/drivers/char/rio/host.h +++ b/drivers/char/rio/host.h @@ -44,6 +44,7 @@ ** the host. */ struct Host { + struct pci_dev *pdev; unsigned char Type; /* RIO_EISA, RIO_MCA, ... */ unsigned char Ivec; /* POLLED or ivec number */ unsigned char Mode; /* Control stuff */ diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c index c382df0f82f..7ac68cb3bed 100644 --- a/drivers/char/rio/rio_linux.c +++ b/drivers/char/rio/rio_linux.c @@ -1017,6 +1017,10 @@ static int __init rio_init(void) rio_dprintk(RIO_DEBUG_PROBE, "Hmm Tested ok, uniqid = %x.\n", p->RIOHosts[p->RIONumHosts].UniqueNum); fix_rio_pci(pdev); + + p->RIOHosts[p->RIONumHosts].pdev = pdev; + pci_dev_get(pdev); + p->RIOLastPCISearch = 0; p->RIONumHosts++; found++; @@ -1066,6 +1070,9 @@ static int __init rio_init(void) ((readb(&p->RIOHosts[p->RIONumHosts].Unique[1]) & 0xFF) << 8) | ((readb(&p->RIOHosts[p->RIONumHosts].Unique[2]) & 0xFF) << 16) | ((readb(&p->RIOHosts[p->RIONumHosts].Unique[3]) & 0xFF) << 24); rio_dprintk(RIO_DEBUG_PROBE, "Hmm Tested ok, uniqid = %x.\n", p->RIOHosts[p->RIONumHosts].UniqueNum); + p->RIOHosts[p->RIONumHosts].pdev = pdev; + pci_dev_get(pdev); + p->RIOLastPCISearch = 0; p->RIONumHosts++; found++; @@ -1181,6 +1188,8 @@ static void __exit rio_exit(void) } /* It is safe/allowed to del_timer a non-active timer */ del_timer(&hp->timer); + if (hp->Type == RIO_PCI) + pci_dev_put(hp->pdev); } if (misc_deregister(&rio_fw_device) < 0) { diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index 7ad3be8c0f4..7fcb77a9d01 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c @@ -54,8 +54,8 @@ static cycle_t acpi_pm_read_verified(void) v1 = read_pmtmr(); v2 = read_pmtmr(); v3 = read_pmtmr(); - } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) - || (v3 > v1 && v3 < v2)); + } while (unlikely((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) + || (v3 > v1 && v3 < v2))); return (cycle_t)v2; } @@ -138,6 +138,8 @@ static void __devinit acpi_pm_check_graylist(struct pci_dev *dev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, acpi_pm_check_graylist); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_LE, + acpi_pm_check_graylist); #endif diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c index 5b77a5bcbf0..ad418ce882c 100644 --- a/drivers/ide/pci/generic.c +++ b/drivers/ide/pci/generic.c @@ -40,6 +40,19 @@ static int ide_generic_all; /* Set to claim all devices */ +/* + * the module_param_named() was added for the modular case + * the __setup() is left as compatibility for existing setups + */ +#ifndef MODULE +static int __init ide_generic_all_on(char *unused) +{ + ide_generic_all = 1; + printk(KERN_INFO "IDE generic will claim all unknown PCI IDE storage controllers."); + return 1; +} +__setup("all-generic-ide", ide_generic_all_on); +#endif module_param_named(all_generic_ide, ide_generic_all, bool, 0444); MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE storage controllers."); diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index d47d38ac71b..d6f614738bb 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -536,7 +536,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) " "-- forcing full recovery\n", bmname(bitmap), events, (unsigned long long) bitmap->mddev->events); - sb->state |= BITMAP_STALE; + sb->state |= cpu_to_le32(BITMAP_STALE); } success: /* assign fields using values from superblock */ @@ -544,11 +544,11 @@ success: bitmap->daemon_sleep = daemon_sleep; bitmap->daemon_lastrun = jiffies; bitmap->max_write_behind = write_behind; - bitmap->flags |= sb->state; + bitmap->flags |= le32_to_cpu(sb->state); if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) bitmap->flags |= BITMAP_HOSTENDIAN; bitmap->events_cleared = le64_to_cpu(sb->events_cleared); - if (sb->state & BITMAP_STALE) + if (sb->state & cpu_to_le32(BITMAP_STALE)) bitmap->events_cleared = bitmap->mddev->events; err = 0; out: @@ -578,9 +578,9 @@ static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, spin_unlock_irqrestore(&bitmap->lock, flags); sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); switch (op) { - case MASK_SET: sb->state |= bits; + case MASK_SET: sb->state |= cpu_to_le32(bits); break; - case MASK_UNSET: sb->state &= ~bits; + case MASK_UNSET: sb->state &= cpu_to_le32(~bits); break; default: BUG(); } diff --git a/drivers/md/md.c b/drivers/md/md.c index f7f19088f3b..7daa7b1e145 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -974,12 +974,13 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) * version 1 superblock */ -static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb) +static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb) { - unsigned int disk_csum, csum; + __le32 disk_csum; + u32 csum; unsigned long long newcsum; int size = 256 + le32_to_cpu(sb->max_dev)*2; - unsigned int *isuper = (unsigned int*)sb; + __le32 *isuper = (__le32*)sb; int i; disk_csum = sb->sb_csum; @@ -989,7 +990,7 @@ static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb) newcsum += le32_to_cpu(*isuper++); if (size == 2) - newcsum += le16_to_cpu(*(unsigned short*) isuper); + newcsum += le16_to_cpu(*(__le16*) isuper); csum = (newcsum & 0xffffffff) + (newcsum >> 32); sb->sb_csum = disk_csum; @@ -1106,7 +1107,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) if (le32_to_cpu(sb->chunksize)) rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1); - if (le32_to_cpu(sb->size) > rdev->size*2) + if (le64_to_cpu(sb->size) > rdev->size*2) return -EINVAL; return ret; } @@ -1228,7 +1229,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) else sb->resync_offset = cpu_to_le64(0); - sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors); + sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors)); sb->raid_disks = cpu_to_le32(mddev->raid_disks); sb->size = cpu_to_le64(mddev->size<<1); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 171ff41b52b..a6260f0e3b9 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -501,7 +501,7 @@ static int multipath_run (mddev_t *mddev) mdname(mddev)); goto out_free_conf; } - mddev->degraded = conf->raid_disks = conf->working_disks; + mddev->degraded = conf->raid_disks - conf->working_disks; conf->pool = mempool_create_kzalloc_pool(NR_RESERVED_BUFS, sizeof(struct multipath_bh)); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1250f0eab4a..74f17a9a6eb 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2079,7 +2079,7 @@ static int run(mddev_t *mddev) disk = conf->mirrors + i; if (!disk->rdev || - !test_bit(In_sync, &rdev->flags)) { + !test_bit(In_sync, &disk->rdev->flags)) { disk->head_position = 0; mddev->degraded++; } diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c index 91f95d172ca..01a5a702b03 100644 --- a/drivers/message/i2o/exec-osm.c +++ b/drivers/message/i2o/exec-osm.c @@ -127,7 +127,7 @@ int i2o_msg_post_wait_mem(struct i2o_controller *c, struct i2o_message *msg, DECLARE_WAIT_QUEUE_HEAD(wq); struct i2o_exec_wait *wait; static u32 tcntxt = 0x80000000; - long flags; + unsigned long flags; int rc = 0; wait = i2o_exec_wait_alloc(); diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index 2802db23d3c..44c9f993dcc 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -212,8 +212,8 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc break; } - free_index = pool->consumer_index++ % pool->size; - pool->consumer_index = free_index; + free_index = pool->consumer_index; + pool->consumer_index = (pool->consumer_index + 1) % pool->size; index = pool->free_map[free_index]; ibmveth_assert(index != IBM_VETH_INVALID_MAP); @@ -329,8 +329,10 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, u64 adapter->rx_buff_pool[pool].buff_size, DMA_FROM_DEVICE); - free_index = adapter->rx_buff_pool[pool].producer_index++ % adapter->rx_buff_pool[pool].size; - adapter->rx_buff_pool[pool].producer_index = free_index; + free_index = adapter->rx_buff_pool[pool].producer_index; + adapter->rx_buff_pool[pool].producer_index + = (adapter->rx_buff_pool[pool].producer_index + 1) + % adapter->rx_buff_pool[pool].size; adapter->rx_buff_pool[pool].free_map[free_index] = index; mb(); diff --git a/fs/dcache.c b/fs/dcache.c index 2bac4ba1d1d..a1ff91eef10 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1469,23 +1469,21 @@ static void switch_names(struct dentry *dentry, struct dentry *target) * deleted it. */ -/** - * d_move - move a dentry +/* + * d_move_locked - move a dentry * @dentry: entry to move * @target: new dentry * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. */ - -void d_move(struct dentry * dentry, struct dentry * target) +static void d_move_locked(struct dentry * dentry, struct dentry * target) { struct hlist_head *list; if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); - spin_lock(&dcache_lock); write_seqlock(&rename_lock); /* * XXXX: do we really need to take target->d_lock? @@ -1536,10 +1534,84 @@ already_unhashed: fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); write_sequnlock(&rename_lock); +} + +/** + * d_move - move a dentry + * @dentry: entry to move + * @target: new dentry + * + * Update the dcache to reflect the move of a file name. Negative + * dcache entries should not be moved in this way. + */ + +void d_move(struct dentry * dentry, struct dentry * target) +{ + spin_lock(&dcache_lock); + d_move_locked(dentry, target); spin_unlock(&dcache_lock); } /* + * Helper that returns 1 if p1 is a parent of p2, else 0 + */ +static int d_isparent(struct dentry *p1, struct dentry *p2) +{ + struct dentry *p; + + for (p = p2; p->d_parent != p; p = p->d_parent) { + if (p->d_parent == p1) + return 1; + } + return 0; +} + +/* + * This helper attempts to cope with remotely renamed directories + * + * It assumes that the caller is already holding + * dentry->d_parent->d_inode->i_mutex and the dcache_lock + * + * Note: If ever the locking in lock_rename() changes, then please + * remember to update this too... + * + * On return, dcache_lock will have been unlocked. + */ +static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) +{ + struct mutex *m1 = NULL, *m2 = NULL; + struct dentry *ret; + + /* If alias and dentry share a parent, then no extra locks required */ + if (alias->d_parent == dentry->d_parent) + goto out_unalias; + + /* Check for loops */ + ret = ERR_PTR(-ELOOP); + if (d_isparent(alias, dentry)) + goto out_err; + + /* See lock_rename() */ + ret = ERR_PTR(-EBUSY); + if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) + goto out_err; + m1 = &dentry->d_sb->s_vfs_rename_mutex; + if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex)) + goto out_err; + m2 = &alias->d_parent->d_inode->i_mutex; +out_unalias: + d_move_locked(alias, dentry); + ret = alias; +out_err: + spin_unlock(&dcache_lock); + if (m2) + mutex_unlock(m2); + if (m1) + mutex_unlock(m1); + return ret; +} + +/* * Prepare an anonymous dentry for life in the superblock's dentry tree as a * named dentry in place of the dentry to be replaced. */ @@ -1581,7 +1653,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) */ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) { - struct dentry *alias, *actual; + struct dentry *actual; BUG_ON(!d_unhashed(dentry)); @@ -1593,26 +1665,27 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) goto found_lock; } - /* See if a disconnected directory already exists as an anonymous root - * that we should splice into the tree instead */ - if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) { - spin_lock(&alias->d_lock); - - /* Is this a mountpoint that we could splice into our tree? */ - if (IS_ROOT(alias)) - goto connect_mountpoint; - - if (alias->d_name.len == dentry->d_name.len && - alias->d_parent == dentry->d_parent && - memcmp(alias->d_name.name, - dentry->d_name.name, - dentry->d_name.len) == 0) - goto replace_with_alias; - - spin_unlock(&alias->d_lock); - - /* Doh! Seem to be aliasing directories for some reason... */ - dput(alias); + if (S_ISDIR(inode->i_mode)) { + struct dentry *alias; + + /* Does an aliased dentry already exist? */ + alias = __d_find_alias(inode, 0); + if (alias) { + actual = alias; + /* Is this an anonymous mountpoint that we could splice + * into our tree? */ + if (IS_ROOT(alias)) { + spin_lock(&alias->d_lock); + __d_materialise_dentry(dentry, alias); + __d_drop(alias); + goto found; + } + /* Nope, but we must(!) avoid directory aliasing */ + actual = __d_unalias(dentry, alias); + if (IS_ERR(actual)) + dput(alias); + goto out_nolock; + } } /* Add a unique reference */ @@ -1628,7 +1701,7 @@ found: _d_rehash(actual); spin_unlock(&actual->d_lock); spin_unlock(&dcache_lock); - +out_nolock: if (actual == dentry) { security_d_instantiate(dentry, inode); return NULL; @@ -1637,16 +1710,6 @@ found: iput(inode); return actual; - /* Convert the anonymous/root alias into an ordinary dentry */ -connect_mountpoint: - __d_materialise_dentry(dentry, alias); - - /* Replace the candidate dentry with the alias in the tree */ -replace_with_alias: - __d_drop(alias); - actual = alias; - goto found; - shouldnt_be_hashed: spin_unlock(&dcache_lock); BUG(); diff --git a/fs/inode.c b/fs/inode.c index d9a21d12292..26cdb115ce6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode) wake_up_bit(&inode->i_state, __I_LOCK); } +/* + * We rarely want to lock two inodes that do not have a parent/child + * relationship (such as directory, child inode) simultaneously. The + * vast majority of file systems should be able to get along fine + * without this. Do not use these functions except as a last resort. + */ +void inode_double_lock(struct inode *inode1, struct inode *inode2) +{ + if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { + if (inode1) + mutex_lock(&inode1->i_mutex); + else if (inode2) + mutex_lock(&inode2->i_mutex); + return; + } + + if (inode1 < inode2) { + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); + } +} +EXPORT_SYMBOL(inode_double_lock); + +void inode_double_unlock(struct inode *inode1, struct inode *inode2) +{ + if (inode1) + mutex_unlock(&inode1->i_mutex); + + if (inode2 && inode2 != inode1) + mutex_unlock(&inode2->i_mutex); +} +EXPORT_SYMBOL(inode_double_unlock); + static __initdata unsigned long ihash_entries; static int __init set_ihash_entries(char *str) { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4133ef5264e..b34cd16f472 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -935,8 +935,17 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru no_entry: res = d_materialise_unique(dentry, inode); - if (res != NULL) + if (res != NULL) { + struct dentry *parent; + if (IS_ERR(res)) + goto out_unlock; + /* Was a directory renamed! */ + parent = dget_parent(res); + if (!IS_ROOT(parent)) + nfs_mark_for_revalidate(parent->d_inode); + dput(parent); dentry = res; + } nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unlock: @@ -1132,6 +1141,8 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) alias = d_materialise_unique(dentry, inode); if (alias != NULL) { dput(dentry); + if (IS_ERR(alias)) + return NULL; dentry = alias; } diff --git a/fs/splice.c b/fs/splice.c index a567010b62a..49fb9f12993 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -707,9 +707,9 @@ out_ret: * key here is the 'actor' worker passed in that actually moves the data * to the wanted destination. See pipe_to_file/pipe_to_sendpage above. */ -ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags, - splice_actor *actor) +static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, size_t len, + unsigned int flags, splice_actor *actor) { int ret, do_wakeup, err; struct splice_desc sd; @@ -722,9 +722,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, sd.file = out; sd.pos = *ppos; - if (pipe->inode) - mutex_lock(&pipe->inode->i_mutex); - for (;;) { if (pipe->nrbufs) { struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; @@ -797,9 +794,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, pipe_wait(pipe); } - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); - if (do_wakeup) { smp_mb(); if (waitqueue_active(&pipe->wait)) @@ -810,6 +804,73 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, return ret; } +ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags, + splice_actor *actor) +{ + ssize_t ret; + struct inode *inode = out->f_mapping->host; + + /* + * The actor worker might be calling ->prepare_write and + * ->commit_write. Most of the time, these expect i_mutex to + * be held. Since this may result in an ABBA deadlock with + * pipe->inode, we have to order lock acquiry here. + */ + inode_double_lock(inode, pipe->inode); + ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor); + inode_double_unlock(inode, pipe->inode); + + return ret; +} + +/** + * generic_file_splice_write_nolock - generic_file_splice_write without mutexes + * @pipe: pipe info + * @out: file to write to + * @len: number of bytes to splice + * @flags: splice modifier flags + * + * Will either move or copy pages (determined by @flags options) from + * the given pipe inode to the given file. The caller is responsible + * for acquiring i_mutex on both inodes. + * + */ +ssize_t +generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) +{ + struct address_space *mapping = out->f_mapping; + struct inode *inode = mapping->host; + ssize_t ret; + int err; + + err = remove_suid(out->f_dentry); + if (unlikely(err)) + return err; + + ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); + if (ret > 0) { + *ppos += ret; + + /* + * If file or inode is SYNC and we actually wrote some data, + * sync it. + */ + if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { + err = generic_osync_inode(inode, mapping, + OSYNC_METADATA|OSYNC_DATA); + + if (err) + ret = err; + } + } + + return ret; +} + +EXPORT_SYMBOL(generic_file_splice_write_nolock); + /** * generic_file_splice_write - splice data from a pipe to a file * @pipe: pipe info @@ -826,12 +887,21 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { struct address_space *mapping = out->f_mapping; + struct inode *inode = mapping->host; ssize_t ret; + int err; + + err = should_remove_suid(out->f_dentry); + if (unlikely(err)) { + mutex_lock(&inode->i_mutex); + err = __remove_suid(out->f_dentry, err); + mutex_unlock(&inode->i_mutex); + if (err) + return err; + } ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); if (ret > 0) { - struct inode *inode = mapping->host; - *ppos += ret; /* @@ -839,8 +909,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, * sync it. */ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { - int err; - mutex_lock(&inode->i_mutex); err = generic_osync_inode(inode, mapping, OSYNC_METADATA|OSYNC_DATA); @@ -1400,13 +1468,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, * grabbing by inode address. Otherwise two different processes * could deadlock (one doing tee from A -> B, the other from B -> A). */ - if (ipipe->inode < opipe->inode) { - mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD); - } + inode_double_lock(ipipe->inode, opipe->inode); do { if (!opipe->readers) { @@ -1450,8 +1512,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, i++; } while (len); - mutex_unlock(&ipipe->inode->i_mutex); - mutex_unlock(&opipe->inode->i_mutex); + inode_double_unlock(ipipe->inode, opipe->inode); /* * If we put data in the output pipe, wakeup any potential readers. diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index cfdb4f6a89d..c26c3adcfac 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -131,6 +131,7 @@ COMPATIBLE_IOCTL(RUN_ARRAY) COMPATIBLE_IOCTL(STOP_ARRAY) COMPATIBLE_IOCTL(STOP_ARRAY_RO) COMPATIBLE_IOCTL(RESTART_ARRAY_RW) +COMPATIBLE_IOCTL(GET_BITMAP_FILE) ULONG_IOCTL(SET_BITMAP_FILE) /* DM */ COMPATIBLE_IOCTL(DM_VERSION_32) diff --git a/include/linux/fs.h b/include/linux/fs.h index 661c7c57214..2fe6e3f900b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -623,6 +623,9 @@ enum inode_i_mutex_lock_class I_MUTEX_QUOTA }; +extern void inode_double_lock(struct inode *inode1, struct inode *inode2); +extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); + /* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic @@ -1709,6 +1712,8 @@ extern void __iget(struct inode * inode); extern void clear_inode(struct inode *); extern void destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); +extern int __remove_suid(struct dentry *, int); +extern int should_remove_suid(struct dentry *); extern int remove_suid(struct dentry *); extern void remove_dquot_ref(struct super_block *, int, struct list_head *); @@ -1755,6 +1760,8 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); +extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *, + struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *, size_t len, unsigned int flags); extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 09f0f575ddf..daabb3aa1ec 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -150,7 +150,7 @@ extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); extern void mpol_fix_fork_child_flag(struct task_struct *p); #define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x)) -#ifdef CONFIG_CPUSET +#ifdef CONFIG_CPUSETS #define current_cpuset_is_being_rebound() \ (cpuset_being_rebound == current->cpuset) #else diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 59855b8718a..ed0762b283a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -674,6 +674,12 @@ void sparse_init(void); #define sparse_index_init(_sec, _nid) do {} while (0) #endif /* CONFIG_SPARSEMEM */ +#ifdef CONFIG_NODES_SPAN_OTHER_NODES +#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid)) +#else +#define early_pfn_in_nid(pfn, nid) (1) +#endif + #ifndef early_pfn_valid #define early_pfn_valid(pfn) (1) #endif diff --git a/include/linux/pci.h b/include/linux/pci.h index 4689e2a699c..09be0f81b27 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -455,7 +455,11 @@ int pci_find_next_capability (struct pci_dev *dev, u8 pos, int cap); int pci_find_ext_capability (struct pci_dev *dev, int cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); -struct pci_dev *pci_get_device (unsigned int vendor, unsigned int device, struct pci_dev *from); +struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device, + struct pci_dev *from); +struct pci_dev *pci_get_device_reverse(unsigned int vendor, unsigned int device, + struct pci_dev *from); + struct pci_dev *pci_get_subsys (unsigned int vendor, unsigned int device, unsigned int ss_vendor, unsigned int ss_device, struct pci_dev *from); @@ -660,7 +664,12 @@ static inline struct pci_dev *pci_find_device(unsigned int vendor, unsigned int static inline struct pci_dev *pci_find_slot(unsigned int bus, unsigned int devfn) { return NULL; } -static inline struct pci_dev *pci_get_device (unsigned int vendor, unsigned int device, struct pci_dev *from) +static inline struct pci_dev *pci_get_device(unsigned int vendor, + unsigned int device, struct pci_dev *from) +{ return NULL; } + +static inline struct pci_dev *pci_get_device_reverse(unsigned int vendor, + unsigned int device, struct pci_dev *from) { return NULL; } static inline struct pci_dev *pci_get_subsys (unsigned int vendor, unsigned int device, diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h index 84d88775185..ebd42a3710b 100644 --- a/include/linux/raid/bitmap.h +++ b/include/linux/raid/bitmap.h @@ -146,16 +146,16 @@ enum bitmap_state { /* the superblock at the front of the bitmap file -- little endian */ typedef struct bitmap_super_s { - __u32 magic; /* 0 BITMAP_MAGIC */ - __u32 version; /* 4 the bitmap major for now, could change... */ - __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */ - __u64 events; /* 24 event counter for the bitmap (1)*/ - __u64 events_cleared;/*32 event counter when last bit cleared (2) */ - __u64 sync_size; /* 40 the size of the md device's sync range(3) */ - __u32 state; /* 48 bitmap state information */ - __u32 chunksize; /* 52 the bitmap chunk size in bytes */ - __u32 daemon_sleep; /* 56 seconds between disk flushes */ - __u32 write_behind; /* 60 number of outstanding write-behind writes */ + __le32 magic; /* 0 BITMAP_MAGIC */ + __le32 version; /* 4 the bitmap major for now, could change... */ + __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */ + __le64 events; /* 24 event counter for the bitmap (1)*/ + __le64 events_cleared;/*32 event counter when last bit cleared (2) */ + __le64 sync_size; /* 40 the size of the md device's sync range(3) */ + __le32 state; /* 48 bitmap state information */ + __le32 chunksize; /* 52 the bitmap chunk size in bytes */ + __le32 daemon_sleep; /* 56 seconds between disk flushes */ + __le32 write_behind; /* 60 number of outstanding write-behind writes */ __u8 pad[256 - 64]; /* set to zero */ } bitmap_super_t; diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index b6ebc69bae5..3f2cd98c508 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -206,52 +206,52 @@ static inline __u64 md_event(mdp_super_t *sb) { */ struct mdp_superblock_1 { /* constant array information - 128 bytes */ - __u32 magic; /* MD_SB_MAGIC: 0xa92b4efc - little endian */ - __u32 major_version; /* 1 */ - __u32 feature_map; /* bit 0 set if 'bitmap_offset' is meaningful */ - __u32 pad0; /* always set to 0 when writing */ + __le32 magic; /* MD_SB_MAGIC: 0xa92b4efc - little endian */ + __le32 major_version; /* 1 */ + __le32 feature_map; /* bit 0 set if 'bitmap_offset' is meaningful */ + __le32 pad0; /* always set to 0 when writing */ __u8 set_uuid[16]; /* user-space generated. */ char set_name[32]; /* set and interpreted by user-space */ - __u64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ - __u32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */ - __u32 layout; /* only for raid5 and raid10 currently */ - __u64 size; /* used size of component devices, in 512byte sectors */ + __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ + __le32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */ + __le32 layout; /* only for raid5 and raid10 currently */ + __le64 size; /* used size of component devices, in 512byte sectors */ - __u32 chunksize; /* in 512byte sectors */ - __u32 raid_disks; - __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts + __le32 chunksize; /* in 512byte sectors */ + __le32 raid_disks; + __le32 bitmap_offset; /* sectors after start of superblock that bitmap starts * NOTE: signed, so bitmap can be before superblock * only meaningful of feature_map[0] is set. */ /* These are only valid with feature bit '4' */ - __u32 new_level; /* new level we are reshaping to */ - __u64 reshape_position; /* next address in array-space for reshape */ - __u32 delta_disks; /* change in number of raid_disks */ - __u32 new_layout; /* new layout */ - __u32 new_chunk; /* new chunk size (bytes) */ + __le32 new_level; /* new level we are reshaping to */ + __le64 reshape_position; /* next address in array-space for reshape */ + __le32 delta_disks; /* change in number of raid_disks */ + __le32 new_layout; /* new layout */ + __le32 new_chunk; /* new chunk size (bytes) */ __u8 pad1[128-124]; /* set to 0 when written */ /* constant this-device information - 64 bytes */ - __u64 data_offset; /* sector start of data, often 0 */ - __u64 data_size; /* sectors in this device that can be used for data */ - __u64 super_offset; /* sector start of this superblock */ - __u64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ - __u32 dev_number; /* permanent identifier of this device - not role in raid */ - __u32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */ + __le64 data_offset; /* sector start of data, often 0 */ + __le64 data_size; /* sectors in this device that can be used for data */ + __le64 super_offset; /* sector start of this superblock */ + __le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ + __le32 dev_number; /* permanent identifier of this device - not role in raid */ + __le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */ __u8 device_uuid[16]; /* user-space setable, ignored by kernel */ __u8 devflags; /* per-device flags. Only one defined...*/ #define WriteMostly1 1 /* mask for writemostly flag in above */ __u8 pad2[64-57]; /* set to 0 when writing */ /* array state information - 64 bytes */ - __u64 utime; /* 40 bits second, 24 btes microseconds */ - __u64 events; /* incremented when superblock updated */ - __u64 resync_offset; /* data before this offset (from data_offset) known to be in sync */ - __u32 sb_csum; /* checksum upto devs[max_dev] */ - __u32 max_dev; /* size of devs[] array to consider */ + __le64 utime; /* 40 bits second, 24 btes microseconds */ + __le64 events; /* incremented when superblock updated */ + __le64 resync_offset; /* data before this offset (from data_offset) known to be in sync */ + __le32 sb_csum; /* checksum upto devs[max_dev] */ + __le32 max_dev; /* size of devs[] array to consider */ __u8 pad3[64-32]; /* set to 0 when writing */ /* device state information. Indexed by dev_number. @@ -260,7 +260,7 @@ struct mdp_superblock_1 { * into the 'roles' value. If a device is spare or faulty, then it doesn't * have a meaningful role. */ - __u16 dev_roles[0]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty */ + __le16 dev_roles[0]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty */ }; /* feature_map bits */ diff --git a/mm/filemap.c b/mm/filemap.c index 8558732e85c..cb26e33fd0f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1884,11 +1884,10 @@ repeat: * if suid or (sgid and xgrp) * remove privs */ -int remove_suid(struct dentry *dentry) +int should_remove_suid(struct dentry *dentry) { mode_t mode = dentry->d_inode->i_mode; int kill = 0; - int result = 0; /* suid always must be killed */ if (unlikely(mode & S_ISUID)) @@ -1901,13 +1900,28 @@ int remove_suid(struct dentry *dentry) if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) kill |= ATTR_KILL_SGID; - if (unlikely(kill && !capable(CAP_FSETID))) { - struct iattr newattrs; + if (unlikely(kill && !capable(CAP_FSETID))) + return kill; - newattrs.ia_valid = ATTR_FORCE | kill; - result = notify_change(dentry, &newattrs); - } - return result; + return 0; +} + +int __remove_suid(struct dentry *dentry, int kill) +{ + struct iattr newattrs; + + newattrs.ia_valid = ATTR_FORCE | kill; + return notify_change(dentry, &newattrs); +} + +int remove_suid(struct dentry *dentry) +{ + int kill = should_remove_suid(dentry); + + if (unlikely(kill)) + return __remove_suid(dentry, kill); + + return 0; } EXPORT_SYMBOL(remove_suid); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ebd425c2e2a..f5fc45472d5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1689,6 +1689,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, for (pfn = start_pfn; pfn < end_pfn; pfn++) { if (!early_pfn_valid(pfn)) continue; + if (!early_pfn_in_nid(pfn, nid)) + continue; page = pfn_to_page(pfn); set_page_links(page, zone, nid, pfn); init_page_count(page); diff --git a/mm/slab.c b/mm/slab.c index 266449d604b..84c631f3074 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3152,12 +3152,15 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) struct zone **z; void *obj = NULL; - for (z = zonelist->zones; *z && !obj; z++) + for (z = zonelist->zones; *z && !obj; z++) { + int nid = zone_to_nid(*z); + if (zone_idx(*z) <= ZONE_NORMAL && - cpuset_zone_allowed(*z, flags)) + cpuset_zone_allowed(*z, flags) && + cache->nodelists[nid]) obj = __cache_alloc_node(cache, - flags | __GFP_THISNODE, - zone_to_nid(*z)); + flags | __GFP_THISNODE, nid); + } return obj; } |