aboutsummaryrefslogtreecommitdiff
path: root/block/genhd.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-10-12 12:35:23 +0200
committerIngo Molnar <mingo@elte.hu>2008-10-12 12:37:32 +0200
commit365d46dc9be9b3c833990a06f3994b1987eda578 (patch)
tree9397d1304144a288411f2118707f44ff5e862fa6 /block/genhd.c
parent5dc64a3442b98eaa0e3730c35fcf00cf962a93e7 (diff)
parentfd048088306656824958e7783ffcee27e241b361 (diff)
Merge branch 'linus' into x86/xen
Conflicts: arch/x86/kernel/cpu/common.c arch/x86/kernel/process_64.c arch/x86/xen/enlighten.c
Diffstat (limited to 'block/genhd.c')
-rw-r--r--block/genhd.c965
1 files changed, 634 insertions, 331 deletions
diff --git a/block/genhd.c b/block/genhd.c
index e0ce23ac2ec..4cd3433c99a 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -16,6 +16,7 @@
#include <linux/kobj_map.h>
#include <linux/buffer_head.h>
#include <linux/mutex.h>
+#include <linux/idr.h>
#include "blk.h"
@@ -24,8 +25,194 @@ static DEFINE_MUTEX(block_class_lock);
struct kobject *block_depr;
#endif
+/* for extended dynamic devt allocation, currently only one major is used */
+#define MAX_EXT_DEVT (1 << MINORBITS)
+
+/* For extended devt allocation. ext_devt_mutex prevents look up
+ * results from going away underneath its user.
+ */
+static DEFINE_MUTEX(ext_devt_mutex);
+static DEFINE_IDR(ext_devt_idr);
+
static struct device_type disk_type;
+/**
+ * disk_get_part - get partition
+ * @disk: disk to look partition from
+ * @partno: partition number
+ *
+ * Look for partition @partno from @disk. If found, increment
+ * reference count and return it.
+ *
+ * CONTEXT:
+ * Don't care.
+ *
+ * RETURNS:
+ * Pointer to the found partition on success, NULL if not found.
+ */
+struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
+{
+ struct hd_struct *part = NULL;
+ struct disk_part_tbl *ptbl;
+
+ if (unlikely(partno < 0))
+ return NULL;
+
+ rcu_read_lock();
+
+ ptbl = rcu_dereference(disk->part_tbl);
+ if (likely(partno < ptbl->len)) {
+ part = rcu_dereference(ptbl->part[partno]);
+ if (part)
+ get_device(part_to_dev(part));
+ }
+
+ rcu_read_unlock();
+
+ return part;
+}
+EXPORT_SYMBOL_GPL(disk_get_part);
+
+/**
+ * disk_part_iter_init - initialize partition iterator
+ * @piter: iterator to initialize
+ * @disk: disk to iterate over
+ * @flags: DISK_PITER_* flags
+ *
+ * Initialize @piter so that it iterates over partitions of @disk.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
+ unsigned int flags)
+{
+ struct disk_part_tbl *ptbl;
+
+ rcu_read_lock();
+ ptbl = rcu_dereference(disk->part_tbl);
+
+ piter->disk = disk;
+ piter->part = NULL;
+
+ if (flags & DISK_PITER_REVERSE)
+ piter->idx = ptbl->len - 1;
+ else if (flags & DISK_PITER_INCL_PART0)
+ piter->idx = 0;
+ else
+ piter->idx = 1;
+
+ piter->flags = flags;
+
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(disk_part_iter_init);
+
+/**
+ * disk_part_iter_next - proceed iterator to the next partition and return it
+ * @piter: iterator of interest
+ *
+ * Proceed @piter to the next partition and return it.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
+{
+ struct disk_part_tbl *ptbl;
+ int inc, end;
+
+ /* put the last partition */
+ disk_put_part(piter->part);
+ piter->part = NULL;
+
+ /* get part_tbl */
+ rcu_read_lock();
+ ptbl = rcu_dereference(piter->disk->part_tbl);
+
+ /* determine iteration parameters */
+ if (piter->flags & DISK_PITER_REVERSE) {
+ inc = -1;
+ if (piter->flags & DISK_PITER_INCL_PART0)
+ end = -1;
+ else
+ end = 0;
+ } else {
+ inc = 1;
+ end = ptbl->len;
+ }
+
+ /* iterate to the next partition */
+ for (; piter->idx != end; piter->idx += inc) {
+ struct hd_struct *part;
+
+ part = rcu_dereference(ptbl->part[piter->idx]);
+ if (!part)
+ continue;
+ if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects)
+ continue;
+
+ get_device(part_to_dev(part));
+ piter->part = part;
+ piter->idx += inc;
+ break;
+ }
+
+ rcu_read_unlock();
+
+ return piter->part;
+}
+EXPORT_SYMBOL_GPL(disk_part_iter_next);
+
+/**
+ * disk_part_iter_exit - finish up partition iteration
+ * @piter: iter of interest
+ *
+ * Called when iteration is over. Cleans up @piter.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+void disk_part_iter_exit(struct disk_part_iter *piter)
+{
+ disk_put_part(piter->part);
+ piter->part = NULL;
+}
+EXPORT_SYMBOL_GPL(disk_part_iter_exit);
+
+/**
+ * disk_map_sector_rcu - map sector to partition
+ * @disk: gendisk of interest
+ * @sector: sector to map
+ *
+ * Find out which partition @sector maps to on @disk. This is
+ * primarily used for stats accounting.
+ *
+ * CONTEXT:
+ * RCU read locked. The returned partition pointer is valid only
+ * while preemption is disabled.
+ *
+ * RETURNS:
+ * Found partition on success, part0 is returned if no partition matches
+ */
+struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
+{
+ struct disk_part_tbl *ptbl;
+ int i;
+
+ ptbl = rcu_dereference(disk->part_tbl);
+
+ for (i = 1; i < ptbl->len; i++) {
+ struct hd_struct *part = rcu_dereference(ptbl->part[i]);
+
+ if (part && part->start_sect <= sector &&
+ sector < part->start_sect + part->nr_sects)
+ return part;
+ }
+ return &disk->part0;
+}
+EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
+
/*
* Can be deleted altogether. Later.
*
@@ -43,14 +230,14 @@ static inline int major_to_index(int major)
}
#ifdef CONFIG_PROC_FS
-void blkdev_show(struct seq_file *f, off_t offset)
+void blkdev_show(struct seq_file *seqf, off_t offset)
{
struct blk_major_name *dp;
if (offset < BLKDEV_MAJOR_HASH_SIZE) {
mutex_lock(&block_class_lock);
for (dp = major_names[offset]; dp; dp = dp->next)
- seq_printf(f, "%3d %s\n", dp->major, dp->name);
+ seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
mutex_unlock(&block_class_lock);
}
}
@@ -136,6 +323,118 @@ EXPORT_SYMBOL(unregister_blkdev);
static struct kobj_map *bdev_map;
+/**
+ * blk_mangle_minor - scatter minor numbers apart
+ * @minor: minor number to mangle
+ *
+ * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
+ * is enabled. Mangling twice gives the original value.
+ *
+ * RETURNS:
+ * Mangled value.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+static int blk_mangle_minor(int minor)
+{
+#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
+ int i;
+
+ for (i = 0; i < MINORBITS / 2; i++) {
+ int low = minor & (1 << i);
+ int high = minor & (1 << (MINORBITS - 1 - i));
+ int distance = MINORBITS - 1 - 2 * i;
+
+ minor ^= low | high; /* clear both bits */
+ low <<= distance; /* swap the positions */
+ high >>= distance;
+ minor |= low | high; /* and set */
+ }
+#endif
+ return minor;
+}
+
+/**
+ * blk_alloc_devt - allocate a dev_t for a partition
+ * @part: partition to allocate dev_t for
+ * @gfp_mask: memory allocation flag
+ * @devt: out parameter for resulting dev_t
+ *
+ * Allocate a dev_t for block device.
+ *
+ * RETURNS:
+ * 0 on success, allocated dev_t is returned in *@devt. -errno on
+ * failure.
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
+{
+ struct gendisk *disk = part_to_disk(part);
+ int idx, rc;
+
+ /* in consecutive minor range? */
+ if (part->partno < disk->minors) {
+ *devt = MKDEV(disk->major, disk->first_minor + part->partno);
+ return 0;
+ }
+
+ /* allocate ext devt */
+ do {
+ if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
+ return -ENOMEM;
+ rc = idr_get_new(&ext_devt_idr, part, &idx);
+ } while (rc == -EAGAIN);
+
+ if (rc)
+ return rc;
+
+ if (idx > MAX_EXT_DEVT) {
+ idr_remove(&ext_devt_idr, idx);
+ return -EBUSY;
+ }
+
+ *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
+ return 0;
+}
+
+/**
+ * blk_free_devt - free a dev_t
+ * @devt: dev_t to free
+ *
+ * Free @devt which was allocated using blk_alloc_devt().
+ *
+ * CONTEXT:
+ * Might sleep.
+ */
+void blk_free_devt(dev_t devt)
+{
+ might_sleep();
+
+ if (devt == MKDEV(0, 0))
+ return;
+
+ if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
+ mutex_lock(&ext_devt_mutex);
+ idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
+ mutex_unlock(&ext_devt_mutex);
+ }
+}
+
+static char *bdevt_str(dev_t devt, char *buf)
+{
+ if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
+ char tbuf[BDEVT_SIZE];
+ snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
+ snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
+ } else
+ snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
+
+ return buf;
+}
+
/*
* Register device numbers dev..(dev+range-1)
* range must be nonzero
@@ -157,11 +456,11 @@ void blk_unregister_region(dev_t devt, unsigned long range)
EXPORT_SYMBOL(blk_unregister_region);
-static struct kobject *exact_match(dev_t devt, int *part, void *data)
+static struct kobject *exact_match(dev_t devt, int *partno, void *data)
{
struct gendisk *p = data;
- return &p->dev.kobj;
+ return &disk_to_dev(p)->kobj;
}
static int exact_lock(dev_t devt, void *data)
@@ -179,21 +478,46 @@ static int exact_lock(dev_t devt, void *data)
*
* This function registers the partitioning information in @disk
* with the kernel.
+ *
+ * FIXME: error handling
*/
void add_disk(struct gendisk *disk)
{
struct backing_dev_info *bdi;
+ dev_t devt;
int retval;
+ /* minors == 0 indicates to use ext devt from part0 and should
+ * be accompanied with EXT_DEVT flag. Make sure all
+ * parameters make sense.
+ */
+ WARN_ON(disk->minors && !(disk->major || disk->first_minor));
+ WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
+
disk->flags |= GENHD_FL_UP;
- blk_register_region(MKDEV(disk->major, disk->first_minor),
- disk->minors, NULL, exact_match, exact_lock, disk);
+
+ retval = blk_alloc_devt(&disk->part0, &devt);
+ if (retval) {
+ WARN_ON(1);
+ return;
+ }
+ disk_to_dev(disk)->devt = devt;
+
+ /* ->major and ->first_minor aren't supposed to be
+ * dereferenced from here on, but set them just in case.
+ */
+ disk->major = MAJOR(devt);
+ disk->first_minor = MINOR(devt);
+
+ blk_register_region(disk_devt(disk), disk->minors, NULL,
+ exact_match, exact_lock, disk);
register_disk(disk);
blk_register_queue(disk);
bdi = &disk->queue->backing_dev_info;
- bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor));
- retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi");
+ bdi_register_dev(bdi, disk_devt(disk));
+ retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
+ "bdi");
WARN_ON(retval);
}
@@ -202,78 +526,71 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
void unlink_gendisk(struct gendisk *disk)
{
- sysfs_remove_link(&disk->dev.kobj, "bdi");
+ sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
bdi_unregister(&disk->queue->backing_dev_info);
blk_unregister_queue(disk);
- blk_unregister_region(MKDEV(disk->major, disk->first_minor),
- disk->minors);
+ blk_unregister_region(disk_devt(disk), disk->minors);
}
/**
* get_gendisk - get partitioning information for a given device
- * @dev: device to get partitioning information for
+ * @devt: device to get partitioning information for
+ * @part: returned partition index
*
* This function gets the structure containing partitioning
- * information for the given device @dev.
+ * information for the given device @devt.
*/
-struct gendisk *get_gendisk(dev_t devt, int *part)
+struct gendisk *get_gendisk(dev_t devt, int *partno)
{
- struct kobject *kobj = kobj_lookup(bdev_map, devt, part);
- struct device *dev = kobj_to_dev(kobj);
+ struct gendisk *disk = NULL;
+
+ if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
+ struct kobject *kobj;
+
+ kobj = kobj_lookup(bdev_map, devt, partno);
+ if (kobj)
+ disk = dev_to_disk(kobj_to_dev(kobj));
+ } else {
+ struct hd_struct *part;
- return kobj ? dev_to_disk(dev) : NULL;
+ mutex_lock(&ext_devt_mutex);
+ part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
+ if (part && get_disk(part_to_disk(part))) {
+ *partno = part->partno;
+ disk = part_to_disk(part);
+ }
+ mutex_unlock(&ext_devt_mutex);
+ }
+
+ return disk;
}
-/*
- * print a partitions - intended for places where the root filesystem can't be
- * mounted and thus to give the victim some idea of what went wrong
+/**
+ * bdget_disk - do bdget() by gendisk and partition number
+ * @disk: gendisk of interest
+ * @partno: partition number
+ *
+ * Find partition @partno from @disk, do bdget() on it.
+ *
+ * CONTEXT:
+ * Don't care.
+ *
+ * RETURNS:
+ * Resulting block_device on success, NULL on failure.
*/
-static int printk_partition(struct device *dev, void *data)
+struct block_device *bdget_disk(struct gendisk *disk, int partno)
{
- struct gendisk *sgp;
- char buf[BDEVNAME_SIZE];
- int n;
-
- if (dev->type != &disk_type)
- goto exit;
+ struct hd_struct *part;
+ struct block_device *bdev = NULL;
- sgp = dev_to_disk(dev);
- /*
- * Don't show empty devices or things that have been surpressed
- */
- if (get_capacity(sgp) == 0 ||
- (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
- goto exit;
+ part = disk_get_part(disk, partno);
+ if (part)
+ bdev = bdget(part_devt(part));
+ disk_put_part(part);
- /*
- * Note, unlike /proc/partitions, I am showing the numbers in
- * hex - the same format as the root= option takes.
- */
- printk("%02x%02x %10llu %s",
- sgp->major, sgp->first_minor,
- (unsigned long long)get_capacity(sgp) >> 1,
- disk_name(sgp, 0, buf));
- if (sgp->driverfs_dev != NULL &&
- sgp->driverfs_dev->driver != NULL)
- printk(" driver: %s\n",
- sgp->driverfs_dev->driver->name);
- else
- printk(" (driver?)\n");
-
- /* now show the partitions */
- for (n = 0; n < sgp->minors - 1; ++n) {
- if (sgp->part[n] == NULL)
- goto exit;
- if (sgp->part[n]->nr_sects == 0)
- goto exit;
- printk(" %02x%02x %10llu %s\n",
- sgp->major, n + 1 + sgp->first_minor,
- (unsigned long long)sgp->part[n]->nr_sects >> 1,
- disk_name(sgp, n + 1, buf));
- }
-exit:
- return 0;
+ return bdev;
}
+EXPORT_SYMBOL(bdget_disk);
/*
* print a full list of all partitions - intended for places where the root
@@ -282,120 +599,145 @@ exit:
*/
void __init printk_all_partitions(void)
{
- mutex_lock(&block_class_lock);
- class_for_each_device(&block_class, NULL, NULL, printk_partition);
- mutex_unlock(&block_class_lock);
+ struct class_dev_iter iter;
+ struct device *dev;
+
+ class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+ while ((dev = class_dev_iter_next(&iter))) {
+ struct gendisk *disk = dev_to_disk(dev);
+ struct disk_part_iter piter;
+ struct hd_struct *part;
+ char name_buf[BDEVNAME_SIZE];
+ char devt_buf[BDEVT_SIZE];
+
+ /*
+ * Don't show empty devices or things that have been
+ * surpressed
+ */
+ if (get_capacity(disk) == 0 ||
+ (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
+ continue;
+
+ /*
+ * Note, unlike /proc/partitions, I am showing the
+ * numbers in hex - the same format as the root=
+ * option takes.
+ */
+ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+ while ((part = disk_part_iter_next(&piter))) {
+ bool is_part0 = part == &disk->part0;
+
+ printk("%s%s %10llu %s", is_part0 ? "" : " ",
+ bdevt_str(part_devt(part), devt_buf),
+ (unsigned long long)part->nr_sects >> 1,
+ disk_name(disk, part->partno, name_buf));
+ if (is_part0) {
+ if (disk->driverfs_dev != NULL &&
+ disk->driverfs_dev->driver != NULL)
+ printk(" driver: %s\n",
+ disk->driverfs_dev->driver->name);
+ else
+ printk(" (driver?)\n");
+ } else
+ printk("\n");
+ }
+ disk_part_iter_exit(&piter);
+ }
+ class_dev_iter_exit(&iter);
}
#ifdef CONFIG_PROC_FS
/* iterator */
-static int find_start(struct device *dev, void *data)
+static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
{
- loff_t *k = data;
+ loff_t skip = *pos;
+ struct class_dev_iter *iter;
+ struct device *dev;
- if (dev->type != &disk_type)
- return 0;
- if (!*k)
- return 1;
- (*k)--;
- return 0;
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return ERR_PTR(-ENOMEM);
+
+ seqf->private = iter;
+ class_dev_iter_init(iter, &block_class, NULL, &disk_type);
+ do {
+ dev = class_dev_iter_next(iter);
+ if (!dev)
+ return NULL;
+ } while (skip--);
+
+ return dev_to_disk(dev);
}
-static void *part_start(struct seq_file *part, loff_t *pos)
+static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
{
struct device *dev;
- loff_t k = *pos;
-
- if (!k)
- part->private = (void *)1LU; /* tell show to print header */
- mutex_lock(&block_class_lock);
- dev = class_find_device(&block_class, NULL, &k, find_start);
- if (dev) {
- put_device(dev);
+ (*pos)++;
+ dev = class_dev_iter_next(seqf->private);
+ if (dev)
return dev_to_disk(dev);
- }
+
return NULL;
}
-static int find_next(struct device *dev, void *data)
+static void disk_seqf_stop(struct seq_file *seqf, void *v)
{
- if (dev->type == &disk_type)
- return 1;
- return 0;
-}
+ struct class_dev_iter *iter = seqf->private;
-static void *part_next(struct seq_file *part, void *v, loff_t *pos)
-{
- struct gendisk *gp = v;
- struct device *dev;
- ++*pos;
- dev = class_find_device(&block_class, &gp->dev, NULL, find_next);
- if (dev) {
- put_device(dev);
- return dev_to_disk(dev);
+ /* stop is called even after start failed :-( */
+ if (iter) {
+ class_dev_iter_exit(iter);
+ kfree(iter);
}
- return NULL;
}
-static void part_stop(struct seq_file *part, void *v)
+static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
{
- mutex_unlock(&block_class_lock);
+ static void *p;
+
+ p = disk_seqf_start(seqf, pos);
+ if (!IS_ERR(p) && p && !*pos)
+ seq_puts(seqf, "major minor #blocks name\n\n");
+ return p;
}
-static int show_partition(struct seq_file *part, void *v)
+static int show_partition(struct seq_file *seqf, void *v)
{
struct gendisk *sgp = v;
- int n;
+ struct disk_part_iter piter;
+ struct hd_struct *part;
char buf[BDEVNAME_SIZE];
- /*
- * Print header if start told us to do. This is to preserve
- * the original behavior of not printing header if no
- * partition exists. This hackery will be removed later with
- * class iteration clean up.
- */
- if (part->private) {
- seq_puts(part, "major minor #blocks name\n\n");
- part->private = NULL;
- }
-
/* Don't show non-partitionable removeable devices or empty devices */
- if (!get_capacity(sgp) ||
- (sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE)))
+ if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
+ (sgp->flags & GENHD_FL_REMOVABLE)))
return 0;
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
return 0;
/* show the full disk and all non-0 size partitions of it */
- seq_printf(part, "%4d %4d %10llu %s\n",
- sgp->major, sgp->first_minor,
- (unsigned long long)get_capacity(sgp) >> 1,
- disk_name(sgp, 0, buf));
- for (n = 0; n < sgp->minors - 1; n++) {
- if (!sgp->part[n])
- continue;
- if (sgp->part[n]->nr_sects == 0)
- continue;
- seq_printf(part, "%4d %4d %10llu %s\n",
- sgp->major, n + 1 + sgp->first_minor,
- (unsigned long long)sgp->part[n]->nr_sects >> 1 ,
- disk_name(sgp, n + 1, buf));
- }
+ disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
+ while ((part = disk_part_iter_next(&piter)))
+ seq_printf(seqf, "%4d %7d %10llu %s\n",
+ MAJOR(part_devt(part)), MINOR(part_devt(part)),
+ (unsigned long long)part->nr_sects >> 1,
+ disk_name(sgp, part->partno, buf));
+ disk_part_iter_exit(&piter);
return 0;
}
const struct seq_operations partitions_op = {
- .start = part_start,
- .next = part_next,
- .stop = part_stop,
+ .start = show_partition_start,
+ .next = disk_seqf_next,
+ .stop = disk_seqf_stop,
.show = show_partition
};
#endif
-static struct kobject *base_probe(dev_t devt, int *part, void *data)
+static struct kobject *base_probe(dev_t devt, int *partno, void *data)
{
if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
/* Make old-style 2.4 aliases work */
@@ -431,29 +773,29 @@ static ssize_t disk_range_show(struct device *dev,
return sprintf(buf, "%d\n", disk->minors);
}
-static ssize_t disk_removable_show(struct device *dev,
+static ssize_t disk_ext_range_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct gendisk *disk = dev_to_disk(dev);
- return sprintf(buf, "%d\n",
- (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
+ return sprintf(buf, "%d\n", disk_max_parts(disk));
}
-static ssize_t disk_ro_show(struct device *dev,
+static ssize_t disk_removable_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct gendisk *disk = dev_to_disk(dev);
- return sprintf(buf, "%d\n", disk->policy ? 1 : 0);
+ return sprintf(buf, "%d\n",
+ (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
}
-static ssize_t disk_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t disk_ro_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct gendisk *disk = dev_to_disk(dev);
- return sprintf(buf, "%llu\n", (unsigned long long)get_capacity(disk));
+ return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
}
static ssize_t disk_capability_show(struct device *dev,
@@ -464,73 +806,26 @@ static ssize_t disk_capability_show(struct device *dev,
return sprintf(buf, "%x\n", disk->flags);
}
-static ssize_t disk_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct gendisk *disk = dev_to_disk(dev);
-
- preempt_disable();
- disk_round_stats(disk);
- preempt_enable();
- return sprintf(buf,
- "%8lu %8lu %8llu %8u "
- "%8lu %8lu %8llu %8u "
- "%8u %8u %8u"
- "\n",
- disk_stat_read(disk, ios[READ]),
- disk_stat_read(disk, merges[READ]),
- (unsigned long long)disk_stat_read(disk, sectors[READ]),
- jiffies_to_msecs(disk_stat_read(disk, ticks[READ])),
- disk_stat_read(disk, ios[WRITE]),
- disk_stat_read(disk, merges[WRITE]),
- (unsigned long long)disk_stat_read(disk, sectors[WRITE]),
- jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])),
- disk->in_flight,
- jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
- jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
-}
-
-#ifdef CONFIG_FAIL_MAKE_REQUEST
-static ssize_t disk_fail_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct gendisk *disk = dev_to_disk(dev);
-
- return sprintf(buf, "%d\n", disk->flags & GENHD_FL_FAIL ? 1 : 0);
-}
-
-static ssize_t disk_fail_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct gendisk *disk = dev_to_disk(dev);
- int i;
-
- if (count > 0 && sscanf(buf, "%d", &i) > 0) {
- if (i == 0)
- disk->flags &= ~GENHD_FL_FAIL;
- else
- disk->flags |= GENHD_FL_FAIL;
- }
-
- return count;
-}
-
-#endif
-
static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
+static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
-static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL);
+static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
-static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL);
+static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
#ifdef CONFIG_FAIL_MAKE_REQUEST
static struct device_attribute dev_attr_fail =
- __ATTR(make-it-fail, S_IRUGO|S_IWUSR, disk_fail_show, disk_fail_store);
+ __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
+#endif
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+static struct device_attribute dev_attr_fail_timeout =
+ __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show,
+ part_timeout_store);
#endif
static struct attribute *disk_attrs[] = {
&dev_attr_range.attr,
+ &dev_attr_ext_range.attr,
&dev_attr_removable.attr,
&dev_attr_ro.attr,
&dev_attr_size.attr,
@@ -539,6 +834,9 @@ static struct attribute *disk_attrs[] = {
#ifdef CONFIG_FAIL_MAKE_REQUEST
&dev_attr_fail.attr,
#endif
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+ &dev_attr_fail_timeout.attr,
+#endif
NULL
};
@@ -551,13 +849,87 @@ static struct attribute_group *disk_attr_groups[] = {
NULL
};
+static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
+{
+ struct disk_part_tbl *ptbl =
+ container_of(head, struct disk_part_tbl, rcu_head);
+
+ kfree(ptbl);
+}
+
+/**
+ * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
+ * @disk: disk to replace part_tbl for
+ * @new_ptbl: new part_tbl to install
+ *
+ * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The
+ * original ptbl is freed using RCU callback.
+ *
+ * LOCKING:
+ * Matching bd_mutx locked.
+ */
+static void disk_replace_part_tbl(struct gendisk *disk,
+ struct disk_part_tbl *new_ptbl)
+{
+ struct disk_part_tbl *old_ptbl = disk->part_tbl;
+
+ rcu_assign_pointer(disk->part_tbl, new_ptbl);
+ if (old_ptbl)
+ call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
+}
+
+/**
+ * disk_expand_part_tbl - expand disk->part_tbl
+ * @disk: disk to expand part_tbl for
+ * @partno: expand such that this partno can fit in
+ *
+ * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl
+ * uses RCU to allow unlocked dereferencing for stats and other stuff.
+ *
+ * LOCKING:
+ * Matching bd_mutex locked, might sleep.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int disk_expand_part_tbl(struct gendisk *disk, int partno)
+{
+ struct disk_part_tbl *old_ptbl = disk->part_tbl;
+ struct disk_part_tbl *new_ptbl;
+ int len = old_ptbl ? old_ptbl->len : 0;
+ int target = partno + 1;
+ size_t size;
+ int i;
+
+ /* disk_max_parts() is zero during initialization, ignore if so */
+ if (disk_max_parts(disk) && target > disk_max_parts(disk))
+ return -EINVAL;
+
+ if (target <= len)
+ return 0;
+
+ size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
+ new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
+ if (!new_ptbl)
+ return -ENOMEM;
+
+ INIT_RCU_HEAD(&new_ptbl->rcu_head);
+ new_ptbl->len = target;
+
+ for (i = 0; i < len; i++)
+ rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
+
+ disk_replace_part_tbl(disk, new_ptbl);
+ return 0;
+}
+
static void disk_release(struct device *dev)
{
struct gendisk *disk = dev_to_disk(dev);
kfree(disk->random);
- kfree(disk->part);
- free_disk_stats(disk);
+ disk_replace_part_tbl(disk, NULL);
+ free_part_stats(&disk->part0);
kfree(disk);
}
struct class block_class = {
@@ -578,83 +950,31 @@ static struct device_type disk_type = {
* The output looks suspiciously like /proc/partitions with a bunch of
* extra fields.
*/
-
-static void *diskstats_start(struct seq_file *part, loff_t *pos)
-{
- struct device *dev;
- loff_t k = *pos;
-
- mutex_lock(&block_class_lock);
- dev = class_find_device(&block_class, NULL, &k, find_start);
- if (dev) {
- put_device(dev);
- return dev_to_disk(dev);
- }
- return NULL;
-}
-
-static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos)
-{
- struct gendisk *gp = v;
- struct device *dev;
-
- ++*pos;
- dev = class_find_device(&block_class, &gp->dev, NULL, find_next);
- if (dev) {
- put_device(dev);
- return dev_to_disk(dev);
- }
- return NULL;
-}
-
-static void diskstats_stop(struct seq_file *part, void *v)
-{
- mutex_unlock(&block_class_lock);
-}
-
-static int diskstats_show(struct seq_file *s, void *v)
+static int diskstats_show(struct seq_file *seqf, void *v)
{
struct gendisk *gp = v;
+ struct disk_part_iter piter;
+ struct hd_struct *hd;
char buf[BDEVNAME_SIZE];
- int n = 0;
+ int cpu;
/*
- if (&gp->dev.kobj.entry == block_class.devices.next)
- seq_puts(s, "major minor name"
+ if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
+ seq_puts(seqf, "major minor name"
" rio rmerge rsect ruse wio wmerge "
"wsect wuse running use aveq"
"\n\n");
*/
- preempt_disable();
- disk_round_stats(gp);
- preempt_enable();
- seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n",
- gp->major, n + gp->first_minor, disk_name(gp, n, buf),
- disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]),
- (unsigned long long)disk_stat_read(gp, sectors[0]),
- jiffies_to_msecs(disk_stat_read(gp, ticks[0])),
- disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]),
- (unsigned long long)disk_stat_read(gp, sectors[1]),
- jiffies_to_msecs(disk_stat_read(gp, ticks[1])),
- gp->in_flight,
- jiffies_to_msecs(disk_stat_read(gp, io_ticks)),
- jiffies_to_msecs(disk_stat_read(gp, time_in_queue)));
-
- /* now show all non-0 size partitions of it */
- for (n = 0; n < gp->minors - 1; n++) {
- struct hd_struct *hd = gp->part[n];
-
- if (!hd || !hd->nr_sects)
- continue;
-
- preempt_disable();
- part_round_stats(hd);
- preempt_enable();
- seq_printf(s, "%4d %4d %s %lu %lu %llu "
+ disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0);
+ while ((hd = disk_part_iter_next(&piter))) {
+ cpu = part_stat_lock();
+ part_round_stats(cpu, hd);
+ part_stat_unlock();
+ seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
"%u %lu %lu %llu %u %u %u %u\n",
- gp->major, n + gp->first_minor + 1,
- disk_name(gp, n + 1, buf),
+ MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
+ disk_name(gp, hd->partno, buf),
part_stat_read(hd, ios[0]),
part_stat_read(hd, merges[0]),
(unsigned long long)part_stat_read(hd, sectors[0]),
@@ -668,14 +988,15 @@ static int diskstats_show(struct seq_file *s, void *v)
jiffies_to_msecs(part_stat_read(hd, time_in_queue))
);
}
+ disk_part_iter_exit(&piter);
return 0;
}
const struct seq_operations diskstats_op = {
- .start = diskstats_start,
- .next = diskstats_next,
- .stop = diskstats_stop,
+ .start = disk_seqf_start,
+ .next = disk_seqf_next,
+ .stop = disk_seqf_stop,
.show = diskstats_show
};
#endif /* CONFIG_PROC_FS */
@@ -690,7 +1011,7 @@ static void media_change_notify_thread(struct work_struct *work)
* set enviroment vars to indicate which event this is for
* so that user space will know to go check the media status.
*/
- kobject_uevent_env(&gd->dev.kobj, KOBJ_CHANGE, envp);
+ kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
put_device(gd->driverfs_dev);
}
@@ -703,42 +1024,29 @@ void genhd_media_change_notify(struct gendisk *disk)
EXPORT_SYMBOL_GPL(genhd_media_change_notify);
#endif /* 0 */
-struct find_block {
- const char *name;
- int part;
-};
-
-static int match_id(struct device *dev, void *data)
+dev_t blk_lookup_devt(const char *name, int partno)
{
- struct find_block *find = data;
+ dev_t devt = MKDEV(0, 0);
+ struct class_dev_iter iter;
+ struct device *dev;
- if (dev->type != &disk_type)
- return 0;
- if (strcmp(dev->bus_id, find->name) == 0) {
+ class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+ while ((dev = class_dev_iter_next(&iter))) {
struct gendisk *disk = dev_to_disk(dev);
- if (find->part < disk->minors)
- return 1;
- }
- return 0;
-}
+ struct hd_struct *part;
-dev_t blk_lookup_devt(const char *name, int part)
-{
- struct device *dev;
- dev_t devt = MKDEV(0, 0);
- struct find_block find;
+ if (strcmp(dev->bus_id, name))
+ continue;
- mutex_lock(&block_class_lock);
- find.name = name;
- find.part = part;
- dev = class_find_device(&block_class, NULL, &find, match_id);
- if (dev) {
- put_device(dev);
- devt = MKDEV(MAJOR(dev->devt),
- MINOR(dev->devt) + part);
+ part = disk_get_part(disk, partno);
+ if (part) {
+ devt = part_devt(part);
+ disk_put_part(part);
+ break;
+ }
+ disk_put_part(part);
}
- mutex_unlock(&block_class_lock);
-
+ class_dev_iter_exit(&iter);
return devt;
}
EXPORT_SYMBOL(blk_lookup_devt);
@@ -747,6 +1055,7 @@ struct gendisk *alloc_disk(int minors)
{
return alloc_disk_node(minors, -1);
}
+EXPORT_SYMBOL(alloc_disk);
struct gendisk *alloc_disk_node(int minors, int node_id)
{
@@ -755,32 +1064,28 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
disk = kmalloc_node(sizeof(struct gendisk),
GFP_KERNEL | __GFP_ZERO, node_id);
if (disk) {
- if (!init_disk_stats(disk)) {
+ if (!init_part_stats(&disk->part0)) {
kfree(disk);
return NULL;
}
- if (minors > 1) {
- int size = (minors - 1) * sizeof(struct hd_struct *);
- disk->part = kmalloc_node(size,
- GFP_KERNEL | __GFP_ZERO, node_id);
- if (!disk->part) {
- free_disk_stats(disk);
- kfree(disk);
- return NULL;
- }
+ if (disk_expand_part_tbl(disk, 0)) {
+ free_part_stats(&disk->part0);
+ kfree(disk);
+ return NULL;
}
+ disk->part_tbl->part[0] = &disk->part0;
+
disk->minors = minors;
rand_initialize_disk(disk);
- disk->dev.class = &block_class;
- disk->dev.type = &disk_type;
- device_initialize(&disk->dev);
+ disk_to_dev(disk)->class = &block_class;
+ disk_to_dev(disk)->type = &disk_type;
+ device_initialize(disk_to_dev(disk));
INIT_WORK(&disk->async_notify,
media_change_notify_thread);
+ disk->node_id = node_id;
}
return disk;
}
-
-EXPORT_SYMBOL(alloc_disk);
EXPORT_SYMBOL(alloc_disk_node);
struct kobject *get_disk(struct gendisk *disk)
@@ -793,7 +1098,7 @@ struct kobject *get_disk(struct gendisk *disk)
owner = disk->fops->owner;
if (owner && !try_module_get(owner))
return NULL;
- kobj = kobject_get(&disk->dev.kobj);
+ kobj = kobject_get(&disk_to_dev(disk)->kobj);
if (kobj == NULL) {
module_put(owner);
return NULL;
@@ -807,27 +1112,28 @@ EXPORT_SYMBOL(get_disk);
void put_disk(struct gendisk *disk)
{
if (disk)
- kobject_put(&disk->dev.kobj);
+ kobject_put(&disk_to_dev(disk)->kobj);
}
EXPORT_SYMBOL(put_disk);
void set_device_ro(struct block_device *bdev, int flag)
{
- if (bdev->bd_contains != bdev)
- bdev->bd_part->policy = flag;
- else
- bdev->bd_disk->policy = flag;
+ bdev->bd_part->policy = flag;
}
EXPORT_SYMBOL(set_device_ro);
void set_disk_ro(struct gendisk *disk, int flag)
{
- int i;
- disk->policy = flag;
- for (i = 0; i < disk->minors - 1; i++)
- if (disk->part[i]) disk->part[i]->policy = flag;
+ struct disk_part_iter piter;
+ struct hd_struct *part;
+
+ disk_part_iter_init(&piter, disk,
+ DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0);
+ while ((part = disk_part_iter_next(&piter)))
+ part->policy = flag;
+ disk_part_iter_exit(&piter);
}
EXPORT_SYMBOL(set_disk_ro);
@@ -836,18 +1142,15 @@ int bdev_read_only(struct block_device *bdev)
{
if (!bdev)
return 0;
- else if (bdev->bd_contains != bdev)
- return bdev->bd_part->policy;
- else
- return bdev->bd_disk->policy;
+ return bdev->bd_part->policy;
}
EXPORT_SYMBOL(bdev_read_only);
-int invalidate_partition(struct gendisk *disk, int index)
+int invalidate_partition(struct gendisk *disk, int partno)
{
int res = 0;
- struct block_device *bdev = bdget_disk(disk, index);
+ struct block_device *bdev = bdget_disk(disk, partno);
if (bdev) {
fsync_bdev(bdev);
res = __invalidate_device(bdev);