diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 16 | ||||
-rw-r--r-- | drivers/block/Makefile | 2 | ||||
-rw-r--r-- | drivers/block/aoe/aoe.h | 69 | ||||
-rw-r--r-- | drivers/block/aoe/aoeblk.c | 72 | ||||
-rw-r--r-- | drivers/block/aoe/aoechr.c | 93 | ||||
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 742 | ||||
-rw-r--r-- | drivers/block/aoe/aoedev.c | 277 | ||||
-rw-r--r-- | drivers/block/aoe/aoemain.c | 2 | ||||
-rw-r--r-- | drivers/block/aoe/aoenet.c | 15 | ||||
-rw-r--r-- | drivers/block/ataflop.c | 16 | ||||
-rw-r--r-- | drivers/block/brd.c | 583 | ||||
-rw-r--r-- | drivers/block/cciss.c | 10 | ||||
-rw-r--r-- | drivers/block/loop.c | 8 | ||||
-rw-r--r-- | drivers/block/nbd.c | 10 | ||||
-rw-r--r-- | drivers/block/paride/pt.c | 2 | ||||
-rw-r--r-- | drivers/block/pktcdvd.c | 4 | ||||
-rw-r--r-- | drivers/block/rd.c | 536 | ||||
-rw-r--r-- | drivers/block/ub.c | 2 | ||||
-rw-r--r-- | drivers/block/xsysace.c | 6 |
19 files changed, 1495 insertions, 970 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 64e5148d82b..b6d230b3209 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -322,7 +322,7 @@ config BLK_DEV_UB If unsure, say N. config BLK_DEV_RAM - tristate "RAM disk support" + tristate "RAM block device support" ---help--- Saying Y here will allow you to use a portion of your RAM memory as a block device, so that you can make file systems on it, read and @@ -357,15 +357,15 @@ config BLK_DEV_RAM_SIZE The default value is 4096 kilobytes. Only change this if you know what you are doing. -config BLK_DEV_RAM_BLOCKSIZE - int "Default RAM disk block size (bytes)" +config BLK_DEV_XIP + bool "Support XIP filesystems on RAM block device" depends on BLK_DEV_RAM - default "1024" + default n help - The default value is 1024 bytes. PAGE_SIZE is a much more - efficient choice however. The default is kept to ensure initrd - setups function - apparently needed by the rd_load_image routine - that supposes the filesystem in the image uses a 1024 blocksize. + Support XIP filesystems (such as ext2 with XIP support on) on + top of block ram device. This will slightly enlarge the kernel, and + will prevent RAM block device backing store memory from being + allocated from highmem (only a problem for highmem systems). config CDROM_PKTCDVD tristate "Packet writing on CD/DVD media" diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 7691505a2e1..01c972415cb 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_AMIGA_FLOPPY) += amiflop.o obj-$(CONFIG_PS3_DISK) += ps3disk.o obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o -obj-$(CONFIG_BLK_DEV_RAM) += rd.o +obj-$(CONFIG_BLK_DEV_RAM) += brd.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o obj-$(CONFIG_BLK_DEV_PS2) += ps2esdi.o obj-$(CONFIG_BLK_DEV_XD) += xd.o diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 07f02f855ab..280e71ee744 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -1,5 +1,5 @@ -/* Copyright (c) 2006 Coraid, Inc. See COPYING for GPL terms. */ -#define VERSION "32" +/* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ +#define VERSION "47" #define AOE_MAJOR 152 #define DEVICE_NAME "aoe" @@ -76,10 +76,8 @@ enum { DEVFL_EXT = (1<<2), /* device accepts lba48 commands */ DEVFL_CLOSEWAIT = (1<<3), /* device is waiting for all closes to revalidate */ DEVFL_GDALLOC = (1<<4), /* need to alloc gendisk */ - DEVFL_PAUSE = (1<<5), + DEVFL_KICKME = (1<<5), /* slow polling network card catch */ DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */ - DEVFL_MAXBCNT = (1<<7), /* d->maxbcnt is not changeable */ - DEVFL_KICKME = (1<<8), BUFFL_FAIL = 1, }; @@ -88,17 +86,25 @@ enum { DEFAULTBCNT = 2 * 512, /* 2 sectors */ NPERSHELF = 16, /* number of slots per shelf address */ FREETAG = -1, - MIN_BUFS = 8, + MIN_BUFS = 16, + NTARGETS = 8, + NAOEIFS = 8, + NSKBPOOLMAX = 128, + + TIMERTICK = HZ / 10, + MINTIMER = HZ >> 2, + MAXTIMER = HZ << 1, + HELPWAIT = 20, }; struct buf { struct list_head bufs; - ulong start_time; /* for disk stats */ + ulong stime; /* for disk stats */ ulong flags; ulong nframesout; - char *bufaddr; ulong resid; ulong bv_resid; + ulong bv_off; sector_t sector; struct bio *bio; struct bio_vec *bv; @@ -114,19 +120,38 @@ struct frame { struct sk_buff *skb; }; +struct aoeif { + struct net_device *nd; + unsigned char lost; + unsigned char lostjumbo; + ushort maxbcnt; +}; + +struct aoetgt { + unsigned char addr[6]; + ushort nframes; + struct frame *frames; + struct aoeif ifs[NAOEIFS]; + struct aoeif *ifp; /* current aoeif in use */ + ushort nout; + ushort maxout; + u16 lasttag; /* last tag sent */ + u16 useme; + ulong lastwadj; /* last window adjustment */ + int wpkts, rpkts; + int dataref; +}; + struct aoedev { struct aoedev *next; - unsigned char addr[6]; /* remote mac addr */ - ushort flags; ulong sysminor; ulong aoemajor; - ulong aoeminor; + u16 aoeminor; + u16 flags; u16 nopen; /* (bd_openers isn't available without sleeping) */ - u16 lasttag; /* last tag sent */ u16 rttavg; /* round trip average of requests/responses */ u16 mintimer; u16 fw_ver; /* version of blade's firmware */ - u16 maxbcnt; struct work_struct work;/* disk create work struct */ struct gendisk *gd; struct request_queue blkq; @@ -134,15 +159,17 @@ struct aoedev { sector_t ssize; struct timer_list timer; spinlock_t lock; - struct net_device *ifp; /* interface ed is attached to */ struct sk_buff *sendq_hd; /* packets needing to be sent, list head */ struct sk_buff *sendq_tl; + struct sk_buff *skbpool_hd; + struct sk_buff *skbpool_tl; + int nskbpool; mempool_t *bufpool; /* for deadlock-free Buf allocation */ struct list_head bufq; /* queue of bios to work on */ struct buf *inprocess; /* the one we're currently working on */ - ushort lostjumbo; - ushort nframes; /* number of frames below */ - struct frame *frames; + struct aoetgt *targets[NTARGETS]; + struct aoetgt **tgt; /* target in use when working */ + struct aoetgt **htgt; /* target needing rexmit assistance */ }; @@ -160,14 +187,16 @@ void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor); void aoecmd_ata_rsp(struct sk_buff *); void aoecmd_cfg_rsp(struct sk_buff *); void aoecmd_sleepwork(struct work_struct *); -struct sk_buff *new_skb(ulong); +void aoecmd_cleanslate(struct aoedev *); +struct sk_buff *aoecmd_ata_id(struct aoedev *); int aoedev_init(void); void aoedev_exit(void); struct aoedev *aoedev_by_aoeaddr(int maj, int min); -struct aoedev *aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt); +struct aoedev *aoedev_by_sysminor_m(ulong sysminor); void aoedev_downdev(struct aoedev *d); int aoedev_isbusy(struct aoedev *d); +int aoedev_flush(const char __user *str, size_t size); int aoenet_init(void); void aoenet_exit(void); @@ -175,4 +204,4 @@ void aoenet_xmit(struct sk_buff *); int is_aoe_netif(struct net_device *ifp); int set_aoe_iflist(const char __user *str, size_t size); -u64 mac_addr(char addr[6]); +unsigned long long mac_addr(char addr[6]); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 826d12381e2..0c39782b266 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2006 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ /* * aoeblk.c * block device routines @@ -24,7 +24,7 @@ static ssize_t aoedisk_show_state(struct device *dev, return snprintf(page, PAGE_SIZE, "%s%s\n", (d->flags & DEVFL_UP) ? "up" : "down", - (d->flags & DEVFL_PAUSE) ? ",paused" : + (d->flags & DEVFL_KICKME) ? ",kickme" : (d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : ""); /* I'd rather see nopen exported so we can ditch closewait */ } @@ -33,17 +33,48 @@ static ssize_t aoedisk_show_mac(struct device *dev, { struct gendisk *disk = dev_to_disk(dev); struct aoedev *d = disk->private_data; + struct aoetgt *t = d->targets[0]; - return snprintf(page, PAGE_SIZE, "%012llx\n", - (unsigned long long)mac_addr(d->addr)); + if (t == NULL) + return snprintf(page, PAGE_SIZE, "none\n"); + return snprintf(page, PAGE_SIZE, "%012llx\n", mac_addr(t->addr)); } static ssize_t aoedisk_show_netif(struct device *dev, struct device_attribute *attr, char *page) { struct gendisk *disk = dev_to_disk(dev); struct aoedev *d = disk->private_data; + struct net_device *nds[8], **nd, **nnd, **ne; + struct aoetgt **t, **te; + struct aoeif *ifp, *e; + char *p; + + memset(nds, 0, sizeof nds); + nd = nds; + ne = nd + ARRAY_SIZE(nds); + t = d->targets; + te = t + NTARGETS; + for (; t < te && *t; t++) { + ifp = (*t)->ifs; + e = ifp + NAOEIFS; + for (; ifp < e && ifp->nd; ifp++) { + for (nnd = nds; nnd < nd; nnd++) + if (*nnd == ifp->nd) + break; + if (nnd == nd && nd != ne) + *nd++ = ifp->nd; + } + } - return snprintf(page, PAGE_SIZE, "%s\n", d->ifp->name); + ne = nd; + nd = nds; + if (*nd == NULL) + return snprintf(page, PAGE_SIZE, "none\n"); + for (p = page; nd < ne; nd++) + p += snprintf(p, PAGE_SIZE - (p-page), "%s%s", + p == page ? "" : ",", (*nd)->name); + p += snprintf(p, PAGE_SIZE - (p-page), "\n"); + return p-page; } /* firmware version */ static ssize_t aoedisk_show_fwver(struct device *dev, @@ -134,7 +165,23 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio) blk_queue_bounce(q, &bio); + if (bio == NULL) { + printk(KERN_ERR "aoe: bio is NULL\n"); + BUG(); + return 0; + } d = bio->bi_bdev->bd_disk->private_data; + if (d == NULL) { + printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n"); + BUG(); + bio_endio(bio, -ENXIO); + return 0; + } else if (bio->bi_io_vec == NULL) { + printk(KERN_ERR "aoe: bi_io_vec is NULL\n"); + BUG(); + bio_endio(bio, -ENXIO); + return 0; + } buf = mempool_alloc(d->bufpool, GFP_NOIO); if (buf == NULL) { printk(KERN_INFO "aoe: buf allocation failure\n"); @@ -143,19 +190,19 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio) } memset(buf, 0, sizeof(*buf)); INIT_LIST_HEAD(&buf->bufs); - buf->start_time = jiffies; + buf->stime = jiffies; buf->bio = bio; buf->resid = bio->bi_size; buf->sector = bio->bi_sector; buf->bv = &bio->bi_io_vec[bio->bi_idx]; - WARN_ON(buf->bv->bv_len == 0); buf->bv_resid = buf->bv->bv_len; - buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; + WARN_ON(buf->bv_resid == 0); + buf->bv_off = buf->bv->bv_offset; spin_lock_irqsave(&d->lock, flags); if ((d->flags & DEVFL_UP) == 0) { - printk(KERN_INFO "aoe: device %ld.%ld is not up\n", + printk(KERN_INFO "aoe: device %ld.%d is not up\n", d->aoemajor, d->aoeminor); spin_unlock_irqrestore(&d->lock, flags); mempool_free(buf, d->bufpool); @@ -208,14 +255,15 @@ aoeblk_gdalloc(void *vp) gd = alloc_disk(AOE_PARTITIONS); if (gd == NULL) { - printk(KERN_ERR "aoe: cannot allocate disk structure for %ld.%ld\n", + printk(KERN_ERR + "aoe: cannot allocate disk structure for %ld.%d\n", d->aoemajor, d->aoeminor); goto err; } d->bufpool = mempool_create_slab_pool(MIN_BUFS, buf_pool_cache); if (d->bufpool == NULL) { - printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%ld\n", + printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n", d->aoemajor, d->aoeminor); goto err_disk; } @@ -229,7 +277,7 @@ aoeblk_gdalloc(void *vp) gd->fops = &aoe_bdops; gd->private_data = d; gd->capacity = d->ssize; - snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%ld", + snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); gd->queue = &d->blkq; diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index d5480e34cb2..e8e60e7a2e7 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2006 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ /* * aoechr.c * AoE character device driver @@ -6,6 +6,7 @@ #include <linux/hdreg.h> #include <linux/blkdev.h> +#include <linux/delay.h> #include "aoe.h" enum { @@ -14,6 +15,7 @@ enum { MINOR_DISCOVER, MINOR_INTERFACES, MINOR_REVALIDATE, + MINOR_FLUSH, MSGSZ = 2048, NMSG = 100, /* message backlog to retain */ }; @@ -42,6 +44,7 @@ static struct aoe_chardev chardevs[] = { { MINOR_DISCOVER, "discover" }, { MINOR_INTERFACES, "interfaces" }, { MINOR_REVALIDATE, "revalidate" }, + { MINOR_FLUSH, "flush" }, }; static int @@ -68,6 +71,7 @@ revalidate(const char __user *str, size_t size) int major, minor, n; ulong flags; struct aoedev *d; + struct sk_buff *skb; char buf[16]; if (size >= sizeof buf) @@ -85,13 +89,20 @@ revalidate(const char __user *str, size_t size) d = aoedev_by_aoeaddr(major, minor); if (!d) return -EINVAL; - spin_lock_irqsave(&d->lock, flags); - d->flags &= ~DEVFL_MAXBCNT; - d->flags |= DEVFL_PAUSE; + aoecmd_cleanslate(d); +loop: + skb = aoecmd_ata_id(d); spin_unlock_irqrestore(&d->lock, flags); + /* try again if we are able to sleep a bit, + * otherwise give up this revalidation + */ + if (!skb && !msleep_interruptible(200)) { + spin_lock_irqsave(&d->lock, flags); + goto loop; + } + aoenet_xmit(skb); aoecmd_cfg(major, minor); - return 0; } @@ -149,6 +160,9 @@ aoechr_write(struct file *filp, const char __user *buf, size_t cnt, loff_t *offp break; case MINOR_REVALIDATE: ret = revalidate(buf, cnt); + break; + case MINOR_FLUSH: + ret = aoedev_flush(buf, cnt); } if (ret == 0) ret = cnt; @@ -185,52 +199,51 @@ aoechr_read(struct file *filp, char __user *buf, size_t cnt, loff_t *off) ulong flags; n = (unsigned long) filp->private_data; - switch (n) { - case MINOR_ERR: - spin_lock_irqsave(&emsgs_lock, flags); -loop: - em = emsgs + emsgs_head_idx; - if ((em->flags & EMFL_VALID) == 0) { - if (filp->f_flags & O_NDELAY) { - spin_unlock_irqrestore(&emsgs_lock, flags); - return -EAGAIN; - } - nblocked_emsgs_readers++; + if (n != MINOR_ERR) + return -EFAULT; + + spin_lock_irqsave(&emsgs_lock, flags); + for (;;) { + em = emsgs + emsgs_head_idx; + if ((em->flags & EMFL_VALID) != 0) + break; + if (filp->f_flags & O_NDELAY) { spin_unlock_irqrestore(&emsgs_lock, flags); + return -EAGAIN; + } + nblocked_emsgs_readers++; - n = down_interruptible(&emsgs_sema); + spin_unlock_irqrestore(&emsgs_lock, flags); + + n = down_interruptible(&emsgs_sema); - spin_lock_irqsave(&emsgs_lock, flags); + spin_lock_irqsave(&emsgs_lock, flags); - nblocked_emsgs_readers--; + nblocked_emsgs_readers--; - if (n) { - spin_unlock_irqrestore(&emsgs_lock, flags); - return -ERESTARTSYS; - } - goto loop; - } - if (em->len > cnt) { + if (n) { spin_unlock_irqrestore(&emsgs_lock, flags); - return -EAGAIN; + return -ERESTARTSYS; } - mp = em->msg; - len = em->len; - em->msg = NULL; - em->flags &= ~EMFL_VALID; + } + if (em->len > cnt) { + spin_unlock_irqrestore(&emsgs_lock, flags); + return -EAGAIN; + } + mp = em->msg; + len = em->len; + em->msg = NULL; + em->flags &= ~EMFL_VALID; - emsgs_head_idx++; - emsgs_head_idx %= ARRAY_SIZE(emsgs); + emsgs_head_idx++; + emsgs_head_idx %= ARRAY_SIZE(emsgs); - spin_unlock_irqrestore(&emsgs_lock, flags); + spin_unlock_irqrestore(&emsgs_lock, flags); - n = copy_to_user(buf, mp, len); - kfree(mp); - return n == 0 ? len : -EFAULT; - default: - return -EFAULT; - } + n = copy_to_user(buf, mp, len); + kfree(mp); + return n == 0 ? len : -EFAULT; } static const struct file_operations aoe_fops = { diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 4d59d505773..d00293ba3b4 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2006 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ /* * aoecmd.c * Filesystem request handling methods @@ -9,19 +9,21 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/genhd.h> +#include <linux/moduleparam.h> #include <net/net_namespace.h> #include <asm/unaligned.h> #include "aoe.h" -#define TIMERTICK (HZ / 10) -#define MINTIMER (2 * TIMERTICK) -#define MAXTIMER (HZ << 1) - static int aoe_deadsecs = 60 * 3; module_param(aoe_deadsecs, int, 0644); MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev."); -struct sk_buff * +static int aoe_maxout = 16; +module_param(aoe_maxout, int, 0644); +MODULE_PARM_DESC(aoe_maxout, + "Only aoe_maxout outstanding packets for every MAC on eX.Y."); + +static struct sk_buff * new_skb(ulong len) { struct sk_buff *skb; @@ -43,12 +45,12 @@ new_skb(ulong len) } static struct frame * -getframe(struct aoedev *d, int tag) +getframe(struct aoetgt *t, int tag) { struct frame *f, *e; - f = d->frames; - e = f + d->nframes; + f = t->frames; + e = f + t->nframes; for (; f<e; f++) if (f->tag == tag) return f; @@ -61,21 +63,21 @@ getframe(struct aoedev *d, int tag) * This driver reserves tag -1 to mean "unused frame." */ static int -newtag(struct aoedev *d) +newtag(struct aoetgt *t) { register ulong n; n = jiffies & 0xffff; - return n |= (++d->lasttag & 0x7fff) << 16; + return n |= (++t->lasttag & 0x7fff) << 16; } static int -aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h) +aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h) { - u32 host_tag = newtag(d); + u32 host_tag = newtag(t); - memcpy(h->src, d->ifp->dev_addr, sizeof h->src); - memcpy(h->dst, d->addr, sizeof h->dst); + memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src); + memcpy(h->dst, t->addr, sizeof h->dst); h->type = __constant_cpu_to_be16(ETH_P_AOE); h->verfl = AOE_HVER; h->major = cpu_to_be16(d->aoemajor); @@ -98,42 +100,162 @@ put_lba(struct aoe_atahdr *ah, sector_t lba) } static void -aoecmd_ata_rw(struct aoedev *d, struct frame *f) +ifrotate(struct aoetgt *t) +{ + t->ifp++; + if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL) + t->ifp = t->ifs; + if (t->ifp->nd == NULL) { + printk(KERN_INFO "aoe: no interface to rotate to\n"); + BUG(); + } +} + +static void +skb_pool_put(struct aoedev *d, struct sk_buff *skb) +{ + if (!d->skbpool_hd) + d->skbpool_hd = skb; + else + d->skbpool_tl->next = skb; + d->skbpool_tl = skb; +} + +static struct sk_buff * +skb_pool_get(struct aoedev *d) +{ + struct sk_buff *skb; + + skb = d->skbpool_hd; + if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) { + d->skbpool_hd = skb->next; + skb->next = NULL; + return skb; + } + if (d->nskbpool < NSKBPOOLMAX + && (skb = new_skb(ETH_ZLEN))) { + d->nskbpool++; + return skb; + } + return NULL; +} + +/* freeframe is where we do our load balancing so it's a little hairy. */ +static struct frame * +freeframe(struct aoedev *d) +{ + struct frame *f, *e, *rf; + struct aoetgt **t; + struct sk_buff *skb; + + if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */ + printk(KERN_ERR "aoe: NULL TARGETS!\n"); + return NULL; + } + t = d->tgt; + t++; + if (t >= &d->targets[NTARGETS] || !*t) + t = d->targets; + for (;;) { + if ((*t)->nout < (*t)->maxout + && t != d->htgt + && (*t)->ifp->nd) { + rf = NULL; + f = (*t)->frames; + e = f + (*t)->nframes; + for (; f < e; f++) { + if (f->tag != FREETAG) + continue; + skb = f->skb; + if (!skb + && !(f->skb = skb = new_skb(ETH_ZLEN))) + continue; + if (atomic_read(&skb_shinfo(skb)->dataref) + != 1) { + if (!rf) + rf = f; + continue; + } +gotone: skb_shinfo(skb)->nr_frags = skb->data_len = 0; + skb_trim(skb, 0); + d->tgt = t; + ifrotate(*t); + return f; + } + /* Work can be done, but the network layer is + holding our precious packets. Try to grab + one from the pool. */ + f = rf; + if (f == NULL) { /* more paranoia */ + printk(KERN_ERR + "aoe: freeframe: %s.\n", + "unexpected null rf"); + d->flags |= DEVFL_KICKME; + return NULL; + } + skb = skb_pool_get(d); + if (skb) { + skb_pool_put(d, f->skb); + f->skb = skb; + goto gotone; + } + (*t)->dataref++; + if ((*t)->nout == 0) + d->flags |= DEVFL_KICKME; + } + if (t == d->tgt) /* we've looped and found nada */ + break; + t++; + if (t >= &d->targets[NTARGETS] || !*t) + t = d->targets; + } + return NULL; +} + +static int +aoecmd_ata_rw(struct aoedev *d) { + struct frame *f; struct aoe_hdr *h; struct aoe_atahdr *ah; struct buf *buf; + struct bio_vec *bv; + struct aoetgt *t; struct sk_buff *skb; ulong bcnt; - register sector_t sector; char writebit, extbit; writebit = 0x10; extbit = 0x4; + f = freeframe(d); + if (f == NULL) + return 0; + t = *d->tgt; buf = d->inprocess; - - sector = buf->sector; - bcnt = buf->bv_resid; - if (bcnt > d->maxbcnt) - bcnt = d->maxbcnt; - + bv = buf->bv; + bcnt = t->ifp->maxbcnt; + if (bcnt == 0) + bcnt = DEFAULTBCNT; + if (bcnt > buf->bv_resid) + bcnt = buf->bv_resid; /* initialize the headers & frame */ skb = f->skb; h = (struct aoe_hdr *) skb_mac_header(skb); ah = (struct aoe_atahdr *) (h+1); skb_put(skb, sizeof *h + sizeof *ah); memset(h, 0, skb->len); - f->tag = aoehdr_atainit(d, h); + f->tag = aoehdr_atainit(d, t, h); + t->nout++; f->waited = 0; f->buf = buf; - f->bufaddr = buf->bufaddr; + f->bufaddr = page_address(bv->bv_page) + buf->bv_off; f->bcnt = bcnt; - f->lba = sector; + f->lba = buf->sector; /* set up ata header */ ah->scnt = bcnt >> 9; - put_lba(ah, sector); + put_lba(ah, buf->sector); if (d->flags & DEVFL_EXT) { ah->aflags |= AOEAFL_EXT; } else { @@ -141,14 +263,14 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f) ah->lba3 &= 0x0f; ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ } - if (bio_data_dir(buf->bio) == WRITE) { - skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr), - offset_in_page(f->bufaddr), bcnt); + skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt); ah->aflags |= AOEAFL_WRITE; skb->len += bcnt; skb->data_len = bcnt; + t->wpkts++; } else { + t->rpkts++; writebit = 0; } @@ -156,29 +278,29 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f) /* mark all tracking fields and load out */ buf->nframesout += 1; - buf->bufaddr += bcnt; + buf->bv_off += bcnt; buf->bv_resid -= bcnt; -/* printk(KERN_DEBUG "aoe: bv_resid=%ld\n", buf->bv_resid); */ buf->resid -= bcnt; buf->sector += bcnt >> 9; if (buf->resid == 0) { d->inprocess = NULL; } else if (buf->bv_resid == 0) { - buf->bv++; - WARN_ON(buf->bv->bv_len == 0); - buf->bv_resid = buf->bv->bv_len; - buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; + buf->bv = ++bv; + buf->bv_resid = bv->bv_len; + WARN_ON(buf->bv_resid == 0); + buf->bv_off = bv->bv_offset; } - skb->dev = d->ifp; + skb->dev = t->ifp->nd; skb = skb_clone(skb, GFP_ATOMIC); - if (skb == NULL) - return; - if (d->sendq_hd) - d->sendq_tl->next = skb; - else - d->sendq_hd = skb; - d->sendq_tl = skb; + if (skb) { + if (d->sendq_hd) + d->sendq_tl->next = skb; + else + d->sendq_hd = skb; + d->sendq_tl = skb; + } + return 1; } /* some callers cannot sleep, and they can call this function, @@ -232,62 +354,8 @@ cont: return sl; } -static struct frame * -freeframe(struct aoedev *d) -{ - struct frame *f, *e; - int n = 0; - - f = d->frames; - e = f + d->nframes; - for (; f<e; f++) { - if (f->tag != FREETAG) - continue; - if (atomic_read(&skb_shinfo(f->skb)->dataref) == 1) { - skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0; - skb_trim(f->skb, 0); - return f; - } - n++; - } - if (n == d->nframes) /* wait for network layer */ - d->flags |= DEVFL_KICKME; - - return NULL; -} - -/* enters with d->lock held */ -void -aoecmd_work(struct aoedev *d) -{ - struct frame *f; - struct buf *buf; - - if (d->flags & DEVFL_PAUSE) { - if (!aoedev_isbusy(d)) - d->sendq_hd = aoecmd_cfg_pkts(d->aoemajor, - d->aoeminor, &d->sendq_tl); - return; - } - -loop: - f = freeframe(d); - if (f == NULL) - return; - if (d->inprocess == NULL) { - if (list_empty(&d->bufq)) - return; - buf = container_of(d->bufq.next, struct buf, bufs); - list_del(d->bufq.next); -/*printk(KERN_DEBUG "aoe: bi_size=%ld\n", buf->bio->bi_size); */ - d->inprocess = buf; - } - aoecmd_ata_rw(d, f); - goto loop; -} - static void -rexmit(struct aoedev *d, struct frame *f) +resend(struct aoedev *d, struct aoetgt *t, struct frame *f) { struct sk_buff *skb; struct aoe_hdr *h; @@ -295,41 +363,46 @@ rexmit(struct aoedev *d, struct frame *f) char buf[128]; u32 n; - n = newtag(d); + ifrotate(t); + n = newtag(t); + skb = f->skb; + h = (struct aoe_hdr *) skb_mac_header(skb); + ah = (struct aoe_atahdr *) (h+1); snprintf(buf, sizeof buf, - "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n", - "retransmit", - d->aoemajor, d->aoeminor, f->tag, jiffies, n); + "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x " + "s=%012llx d=%012llx nout=%d\n", + "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n, + mac_addr(h->src), + mac_addr(h->dst), t->nout); aoechr_error(buf); - skb = f->skb; - h = (struct aoe_hdr *) skb_mac_header(skb); - ah = (struct aoe_atahdr *) (h+1); f->tag = n; h->tag = cpu_to_be32(n); - memcpy(h->dst, d->addr, sizeof h->dst); - memcpy(h->src, d->ifp->dev_addr, sizeof h->src); - - n = DEFAULTBCNT / 512; - if (ah->scnt > n) { - ah->scnt = n; + memcpy(h->dst, t->addr, sizeof h->dst); + memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src); + + switch (ah->cmdstat) { + default: + break; + case WIN_READ: + case WIN_READ_EXT: + case WIN_WRITE: + case WIN_WRITE_EXT: + put_lba(ah, f->lba); + + n = f->bcnt; + if (n > DEFAULTBCNT) + n = DEFAULTBCNT; + ah->scnt = n >> 9; if (ah->aflags & AOEAFL_WRITE) { skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr), - offset_in_page(f->bufaddr), DEFAULTBCNT); - skb->len = sizeof *h + sizeof *ah + DEFAULTBCNT; - skb->data_len = DEFAULTBCNT; - } - if (++d->lostjumbo > (d->nframes << 1)) - if (d->maxbcnt != DEFAULTBCNT) { - printk(KERN_INFO "aoe: e%ld.%ld: too many lost jumbo on %s - using 1KB frames.\n", - d->aoemajor, d->aoeminor, d->ifp->name); - d->maxbcnt = DEFAULTBCNT; - d->flags |= DEVFL_MAXBCNT; + offset_in_page(f->bufaddr), n); + skb->len = sizeof *h + sizeof *ah + n; + skb->data_len = n; } } - - skb->dev = d->ifp; + skb->dev = t->ifp->nd; skb = skb_clone(skb, GFP_ATOMIC); if (skb == NULL) return; @@ -352,10 +425,92 @@ tsince(int tag) return n; } +static struct aoeif * +getif(struct aoetgt *t, struct net_device *nd) +{ + struct aoeif *p, *e; + + p = t->ifs; + e = p + NAOEIFS; + for (; p < e; p++) + if (p->nd == nd) + return p; + return NULL; +} + +static struct aoeif * +addif(struct aoetgt *t, struct net_device *nd) +{ + struct aoeif *p; + + p = getif(t, NULL); + if (!p) + return NULL; + p->nd = nd; + p->maxbcnt = DEFAULTBCNT; + p->lost = 0; + p->lostjumbo = 0; + return p; +} + +static void +ejectif(struct aoetgt *t, struct aoeif *ifp) +{ + struct aoeif *e; + ulong n; + + e = t->ifs + NAOEIFS - 1; + n = (e - ifp) * sizeof *ifp; + memmove(ifp, ifp+1, n); + e->nd = NULL; +} + +static int +sthtith(struct aoedev *d) +{ + struct frame *f, *e, *nf; + struct sk_buff *skb; + struct aoetgt *ht = *d->htgt; + + f = ht->frames; + e = f + ht->nframes; + for (; f < e; f++) { + if (f->tag == FREETAG) + continue; + nf = freeframe(d); + if (!nf) + return 0; + skb = nf->skb; + *nf = *f; + f->skb = skb; + f->tag = FREETAG; + nf->waited = 0; + ht->nout--; + (*d->tgt)->nout++; + resend(d, *d->tgt, nf); + } + /* he's clean, he's useless. take away his interfaces */ + memset(ht->ifs, 0, sizeof ht->ifs); + d->htgt = NULL; + return 1; +} + +static inline unsigned char +ata_scnt(unsigned char *packet) { + struct aoe_hdr *h; + struct aoe_atahdr *ah; + + h = (struct aoe_hdr *) packet; + ah = (struct aoe_atahdr *) (h+1); + return ah->scnt; +} + static void rexmit_timer(ulong vp) { struct aoedev *d; + struct aoetgt *t, **tt, **te; + struct aoeif *ifp; struct frame *f, *e; struct sk_buff *sl; register long timeout; @@ -374,31 +529,79 @@ rexmit_timer(ulong vp) spin_unlock_irqrestore(&d->lock, flags); return; } - f = d->frames; - e = f + d->nframes; - for (; f<e; f++) { - if (f->tag != FREETAG && tsince(f->tag) >= timeout) { + tt = d->targets; + te = tt + NTARGETS; + for (; tt < te && *tt; tt++) { + t = *tt; + f = t->frames; + e = f + t->nframes; + for (; f < e; f++) { + if (f->tag == FREETAG + || tsince(f->tag) < timeout) + continue; n = f->waited += timeout; n /= HZ; - if (n > aoe_deadsecs) { /* waited too long for response */ + if (n > aoe_deadsecs) { + /* waited too long. device failure. */ aoedev_downdev(d); break; } - rexmit(d, f); + + if (n > HELPWAIT /* see if another target can help */ + && (tt != d->targets || d->targets[1])) + d->htgt = tt; + + if (t->nout == t->maxout) { + if (t->maxout > 1) + t->maxout--; + t->lastwadj = jiffies; + } + + ifp = getif(t, f->skb->dev); + if (ifp && ++ifp->lost > (t->nframes << 1) + && (ifp != t->ifs || t->ifs[1].nd)) { + ejectif(t, ifp); + ifp = NULL; + } + + if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512 + && ifp && ++ifp->lostjumbo > (t->nframes << 1) + && ifp->maxbcnt != DEFAULTBCNT) { + printk(KERN_INFO + "aoe: e%ld.%d: " + "too many lost jumbo on " + "%s:%012llx - " + "falling back to %d frames.\n", + d->aoemajor, d->aoeminor, + ifp->nd->name, mac_addr(t->addr), + DEFAULTBCNT); + ifp->maxbcnt = 0; + } + resend(d, t, f); + } + + /* window check */ + if (t->nout == t->maxout + && t->maxout < t->nframes + && (jiffies - t->lastwadj)/HZ > 10) { + t->maxout++; + t->lastwadj = jiffies; } } - if (d->flags & DEVFL_KICKME) { + + if (d->sendq_hd) { + n = d->rttavg <<= 1; + if (n > MAXTIMER) + d->rttavg = MAXTIMER; + } + + if (d->flags & DEVFL_KICKME || d->htgt) { d->flags &= ~DEVFL_KICKME; aoecmd_work(d); } sl = d->sendq_hd; d->sendq_hd = d->sendq_tl = NULL; - if (sl) { - n = d->rttavg <<= 1; - if (n > MAXTIMER) - d->rttavg = MAXTIMER; - } d->timer.expires = jiffies + TIMERTICK; add_timer(&d->timer); @@ -408,6 +611,25 @@ rexmit_timer(ulong vp) aoenet_xmit(sl); } +/* enters with d->lock held */ +void +aoecmd_work(struct aoedev *d) +{ + struct buf *buf; +loop: + if (d->htgt && !sthtith(d)) + return; + if (d->inprocess == NULL) { + if (list_empty(&d->bufq)) + return; + buf = container_of(d->bufq.next, struct buf, bufs); + list_del(d->bufq.next); + d->inprocess = buf; + } + if (aoecmd_ata_rw(d)) + goto loop; +} + /* this function performs work that has been deferred until sleeping is OK */ void @@ -440,7 +662,7 @@ aoecmd_sleepwork(struct work_struct *work) } static void -ataid_complete(struct aoedev *d, unsigned char *id) +ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) { u64 ssize; u16 n; @@ -475,24 +697,20 @@ ataid_complete(struct aoedev *d, unsigned char *id) } if (d->ssize != ssize) - printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu sectors\n", - (unsigned long long)mac_addr(d->addr), + printk(KERN_INFO + "aoe: %012llx e%ld.%d v%04x has %llu sectors\n", + mac_addr(t->addr), d->aoemajor, d->aoeminor, d->fw_ver, (long long)ssize); d->ssize = ssize; d->geo.start = 0; + if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) + return; if (d->gd != NULL) { d->gd->capacity = ssize; d->flags |= DEVFL_NEWSIZE; - } else { - if (d->flags & DEVFL_GDALLOC) { - printk(KERN_ERR "aoe: can't schedule work for e%lu.%lu, %s\n", - d->aoemajor, d->aoeminor, - "it's already on! This shouldn't happen.\n"); - return; - } + } else d->flags |= DEVFL_GDALLOC; - } schedule_work(&d->work); } @@ -519,6 +737,31 @@ calc_rttavg(struct aoedev *d, int rtt) d->rttavg += n >> 2; } +static struct aoetgt * +gettgt(struct aoedev *d, char *addr) +{ + struct aoetgt **t, **e; + + t = d->targets; + e = t + NTARGETS; + for (; t < e && *t; t++) + if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0) + return *t; + return NULL; +} + +static inline void +diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector) +{ + unsigned long n_sect = bio->bi_size >> 9; + const int rw = bio_data_dir(bio); + + all_stat_inc(disk, ios[rw], sector); + all_stat_add(disk, ticks[rw], duration, sector); + all_stat_add(disk, sectors[rw], n_sect, sector); + all_stat_add(disk, io_ticks, duration, sector); +} + void aoecmd_ata_rsp(struct sk_buff *skb) { @@ -528,6 +771,8 @@ aoecmd_ata_rsp(struct sk_buff *skb) struct frame *f; struct buf *buf; struct sk_buff *sl; + struct aoetgt *t; + struct aoeif *ifp; register long n; ulong flags; char ebuf[128]; @@ -547,7 +792,14 @@ aoecmd_ata_rsp(struct sk_buff *skb) spin_lock_irqsave(&d->lock, flags); n = be32_to_cpu(get_unaligned(&hin->tag)); - f = getframe(d, n); + t = gettgt(d, hin->src); + if (t == NULL) { + printk(KERN_INFO "aoe: can't find target e%ld.%d:%012llx\n", + d->aoemajor, d->aoeminor, mac_addr(hin->src)); + spin_unlock_irqrestore(&d->lock, flags); + return; + } + f = getframe(t, n); if (f == NULL) { calc_rttavg(d, -tsince(n)); spin_unlock_irqrestore(&d->lock, flags); @@ -569,24 +821,24 @@ aoecmd_ata_rsp(struct sk_buff *skb) ahout = (struct aoe_atahdr *) (hout+1); buf = f->buf; - if (ahout->cmdstat == WIN_IDENTIFY) - d->flags &= ~DEVFL_PAUSE; if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */ printk(KERN_ERR - "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n", + "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n", ahout->cmdstat, ahin->cmdstat, d->aoemajor, d->aoeminor); if (buf) buf->flags |= BUFFL_FAIL; } else { + if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */ + d->htgt = NULL; n = ahout->scnt << 9; switch (ahout->cmdstat) { case WIN_READ: case WIN_READ_EXT: if (skb->len - sizeof *hin - sizeof *ahin < n) { printk(KERN_ERR - "aoe: runt data size in read. skb->len=%d\n", - skb->len); + "aoe: %s. skb->len=%d need=%ld\n", + "runt data size in read", skb->len, n); /* fail frame f? just returning will rexmit. */ spin_unlock_irqrestore(&d->lock, flags); return; @@ -594,32 +846,18 @@ aoecmd_ata_rsp(struct sk_buff *skb) memcpy(f->bufaddr, ahin+1, n); case WIN_WRITE: case WIN_WRITE_EXT: + ifp = getif(t, skb->dev); + if (ifp) { + ifp->lost = 0; + if (n > DEFAULTBCNT) + ifp->lostjumbo = 0; + } if (f->bcnt -= n) { - skb = f->skb; + f->lba += n >> 9; f->bufaddr += n; - put_lba(ahout, f->lba += ahout->scnt); - n = f->bcnt; - if (n > DEFAULTBCNT) - n = DEFAULTBCNT; - ahout->scnt = n >> 9; - if (ahout->aflags & AOEAFL_WRITE) { - skb_fill_page_desc(skb, 0, - virt_to_page(f->bufaddr), - offset_in_page(f->bufaddr), n); - skb->len = sizeof *hout + sizeof *ahout + n; - skb->data_len = n; - } - f->tag = newtag(d); - hout->tag = cpu_to_be32(f->tag); - skb->dev = d->ifp; - skb = skb_clone(skb, GFP_ATOMIC); - spin_unlock_irqrestore(&d->lock, flags); - if (skb) - aoenet_xmit(skb); - return; + resend(d, t, f); + goto xmit; } - if (n > DEFAULTBCNT) - d->lostjumbo = 0; break; case WIN_IDENTIFY: if (skb->len - sizeof *hin - sizeof *ahin < 512) { @@ -629,7 +867,7 @@ aoecmd_ata_rsp(struct sk_buff *skb) spin_unlock_irqrestore(&d->lock, flags); return; } - ataid_complete(d, (char *) (ahin+1)); + ataid_complete(d, t, (char *) (ahin+1)); break; default: printk(KERN_INFO @@ -640,28 +878,19 @@ aoecmd_ata_rsp(struct sk_buff *skb) } } - if (buf) { - buf->nframesout -= 1; - if (buf->nframesout == 0 && buf->resid == 0) { - unsigned long duration = jiffies - buf->start_time; - unsigned long n_sect = buf->bio->bi_size >> 9; - struct gendisk *disk = d->gd; - const int rw = bio_data_dir(buf->bio); - - disk_stat_inc(disk, ios[rw]); - disk_stat_add(disk, ticks[rw], duration); - disk_stat_add(disk, sectors[rw], n_sect); - disk_stat_add(disk, io_ticks, duration); - n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; - bio_endio(buf->bio, n); - mempool_free(buf, d->bufpool); - } + if (buf && --buf->nframesout == 0 && buf->resid == 0) { + diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector); + n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; + bio_endio(buf->bio, n); + mempool_free(buf, d->bufpool); } f->buf = NULL; f->tag = FREETAG; + t->nout--; aoecmd_work(d); +xmit: sl = d->sendq_hd; d->sendq_hd = d->sendq_tl = NULL; @@ -679,23 +908,20 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) aoenet_xmit(sl); } -/* - * Since we only call this in one place (and it only prepares one frame) - * we just return the skb. Usually we'd chain it up to the aoedev sendq. - */ -static struct sk_buff * +struct sk_buff * aoecmd_ata_id(struct aoedev *d) { struct aoe_hdr *h; struct aoe_atahdr *ah; struct frame *f; struct sk_buff *skb; + struct aoetgt *t; f = freeframe(d); - if (f == NULL) { - printk(KERN_ERR "aoe: can't get a frame. This shouldn't happen.\n"); + if (f == NULL) return NULL; - } + + t = *d->tgt; /* initialize the headers & frame */ skb = f->skb; @@ -703,7 +929,8 @@ aoecmd_ata_id(struct aoedev *d) ah = (struct aoe_atahdr *) (h+1); skb_put(skb, sizeof *h + sizeof *ah); memset(h, 0, skb->len); - f->tag = aoehdr_atainit(d, h); + f->tag = aoehdr_atainit(d, t, h); + t->nout++; f->waited = 0; /* set up ata header */ @@ -711,7 +938,7 @@ aoecmd_ata_id(struct aoedev *d) ah->cmdstat = WIN_IDENTIFY; ah->lba3 = 0xa0; - skb->dev = d->ifp; + skb->dev = t->ifp->nd; d->rttavg = MAXTIMER; d->timer.function = rexmit_timer; @@ -719,15 +946,52 @@ aoecmd_ata_id(struct aoedev *d) return skb_clone(skb, GFP_ATOMIC); } +static struct aoetgt * +addtgt(struct aoedev *d, char *addr, ulong nframes) +{ + struct aoetgt *t, **tt, **te; + struct frame *f, *e; + + tt = d->targets; + te = tt + NTARGETS; + for (; tt < te && *tt; tt++) + ; + + if (tt == te) { + printk(KERN_INFO + "aoe: device addtgt failure; too many targets\n"); + return NULL; + } + t = kcalloc(1, sizeof *t, GFP_ATOMIC); + f = kcalloc(nframes, sizeof *f, GFP_ATOMIC); + if (!t || !f) { + kfree(f); + kfree(t); + printk(KERN_INFO "aoe: cannot allocate memory to add target\n"); + return NULL; + } + + t->nframes = nframes; + t->frames = f; + e = f + nframes; + for (; f < e; f++) + f->tag = FREETAG; + memcpy(t->addr, addr, sizeof t->addr); + t->ifp = t->ifs; + t->maxout = t->nframes; + return *tt = t; +} + void aoecmd_cfg_rsp(struct sk_buff *skb) { struct aoedev *d; struct aoe_hdr *h; struct aoe_cfghdr *ch; + struct aoetgt *t; + struct aoeif *ifp; ulong flags, sysminor, aoemajor; struct sk_buff *sl; - enum { MAXFRAMES = 16 }; u16 n; h = (struct aoe_hdr *) skb_mac_header(skb); @@ -752,10 +1016,10 @@ aoecmd_cfg_rsp(struct sk_buff *skb) } n = be16_to_cpu(ch->bufcnt); - if (n > MAXFRAMES) /* keep it reasonable */ - n = MAXFRAMES; + if (n > aoe_maxout) /* keep it reasonable */ + n = aoe_maxout; - d = aoedev_by_sysminor_m(sysminor, n); + d = aoedev_by_sysminor_m(sysminor); if (d == NULL) { printk(KERN_INFO "aoe: device sysminor_m failure\n"); return; @@ -763,38 +1027,74 @@ aoecmd_cfg_rsp(struct sk_buff *skb) spin_lock_irqsave(&d->lock, flags); - /* permit device to migrate mac and network interface */ - d->ifp = skb->dev; - memcpy(d->addr, h->src, sizeof d->addr); - if (!(d->flags & DEVFL_MAXBCNT)) { - n = d->ifp->mtu; + t = gettgt(d, h->src); + if (!t) { + t = addtgt(d, h->src, n); + if (!t) { + spin_unlock_irqrestore(&d->lock, flags); + return; + } + } + ifp = getif(t, skb->dev); + if (!ifp) { + ifp = addif(t, skb->dev); + if (!ifp) { + printk(KERN_INFO + "aoe: device addif failure; " + "too many interfaces?\n"); + spin_unlock_irqrestore(&d->lock, flags); + return; + } + } + if (ifp->maxbcnt) { + n = ifp->nd->mtu; n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr); n /= 512; if (n > ch->scnt) n = ch->scnt; n = n ? n * 512 : DEFAULTBCNT; - if (n != d->maxbcnt) { + if (n != ifp->maxbcnt) { printk(KERN_INFO - "aoe: e%ld.%ld: setting %d byte data frames on %s\n", - d->aoemajor, d->aoeminor, n, d->ifp->name); - d->maxbcnt = n; + "aoe: e%ld.%d: setting %d%s%s:%012llx\n", + d->aoemajor, d->aoeminor, n, + " byte data frames on ", ifp->nd->name, + mac_addr(t->addr)); + ifp->maxbcnt = n; } } /* don't change users' perspective */ - if (d->nopen && !(d->flags & DEVFL_PAUSE)) { + if (d->nopen) { spin_unlock_irqrestore(&d->lock, flags); return; } - d->flags |= DEVFL_PAUSE; /* force pause */ - d->mintimer = MINTIMER; d->fw_ver = be16_to_cpu(ch->fwver); - /* check for already outstanding ataid */ - sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL; + sl = aoecmd_ata_id(d); spin_unlock_irqrestore(&d->lock, flags); aoenet_xmit(sl); } +void +aoecmd_cleanslate(struct aoedev *d) +{ + struct aoetgt **t, **te; + struct aoeif *p, *e; + + d->mintimer = MINTIMER; + + t = d->targets; + te = t + NTARGETS; + for (; t < te && *t; t++) { + (*t)->maxout = (*t)->nframes; + p = (*t)->ifs; + e = p + NAOEIFS; + for (; p < e; p++) { + p->lostjumbo = 0; + p->lost = 0; + p->maxbcnt = DEFAULTBCNT; + } + } +} diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 51f50710e5f..f9a1cd9edb7 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2006 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ /* * aoedev.c * AoE device utility functions; maintains device list. @@ -7,23 +7,32 @@ #include <linux/hdreg.h> #include <linux/blkdev.h> #include <linux/netdevice.h> +#include <linux/delay.h> #include "aoe.h" +static void dummy_timer(ulong); +static void aoedev_freedev(struct aoedev *); +static void freetgt(struct aoedev *d, struct aoetgt *t); +static void skbpoolfree(struct aoedev *d); + static struct aoedev *devlist; -static spinlock_t devlist_lock; +static DEFINE_SPINLOCK(devlist_lock); int aoedev_isbusy(struct aoedev *d) { + struct aoetgt **t, **te; struct frame *f, *e; - f = d->frames; - e = f + d->nframes; - do { - if (f->tag != FREETAG) - return 1; - } while (++f < e); - + t = d->targets; + te = t + NTARGETS; + for (; t < te && *t; t++) { + f = (*t)->frames; + e = f + (*t)->nframes; + for (; f < e; f++) + if (f->tag != FREETAG) + return 1; + } return 0; } @@ -55,75 +64,41 @@ dummy_timer(ulong vp) add_timer(&d->timer); } -/* called with devlist lock held */ -static struct aoedev * -aoedev_newdev(ulong nframes) -{ - struct aoedev *d; - struct frame *f, *e; - - d = kzalloc(sizeof *d, GFP_ATOMIC); - f = kcalloc(nframes, sizeof *f, GFP_ATOMIC); - switch (!d || !f) { - case 0: - d->nframes = nframes; - d->frames = f; - e = f + nframes; - for (; f<e; f++) { - f->tag = FREETAG; - f->skb = new_skb(ETH_ZLEN); - if (!f->skb) - break; - } - if (f == e) - break; - while (f > d->frames) { - f--; - dev_kfree_skb(f->skb); - } - default: - if (f) - kfree(f); - if (d) - kfree(d); - return NULL; - } - INIT_WORK(&d->work, aoecmd_sleepwork); - spin_lock_init(&d->lock); - init_timer(&d->timer); - d->timer.data = (ulong) d; - d->timer.function = dummy_timer; - d->timer.expires = jiffies + HZ; - add_timer(&d->timer); - d->bufpool = NULL; /* defer to aoeblk_gdalloc */ - INIT_LIST_HEAD(&d->bufq); - d->next = devlist; - devlist = d; - - return d; -} - void aoedev_downdev(struct aoedev *d) { + struct aoetgt **t, **te; struct frame *f, *e; struct buf *buf; struct bio *bio; - f = d->frames; - e = f + d->nframes; - for (; f<e; f->tag = FREETAG, f->buf = NULL, f++) { - if (f->tag == FREETAG || f->buf == NULL) - continue; - buf = f->buf; - bio = buf->bio; - if (--buf->nframesout == 0) { - mempool_free(buf, d->bufpool); - bio_endio(bio, -EIO); + t = d->targets; + te = t + NTARGETS; + for (; t < te && *t; t++) { + f = (*t)->frames; + e = f + (*t)->nframes; + for (; f < e; f->tag = FREETAG, f->buf = NULL, f++) { + if (f->tag == FREETAG || f->buf == NULL) + continue; + buf = f->buf; + bio = buf->bio; + if (--buf->nframesout == 0 + && buf != d->inprocess) { + mempool_free(buf, d->bufpool); + bio_endio(bio, -EIO); + } } - skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0; + (*t)->maxout = (*t)->nframes; + (*t)->nout = 0; + } + buf = d->inprocess; + if (buf) { + bio = buf->bio; + mempool_free(buf, d->bufpool); + bio_endio(bio, -EIO); } d->inprocess = NULL; + d->htgt = NULL; while (!list_empty(&d->bufq)) { buf = container_of(d->bufq.next, struct buf, bufs); @@ -136,12 +111,114 @@ aoedev_downdev(struct aoedev *d) if (d->gd) d->gd->capacity = 0; - d->flags &= ~(DEVFL_UP | DEVFL_PAUSE); + d->flags &= ~DEVFL_UP; +} + +static void +aoedev_freedev(struct aoedev *d) +{ + struct aoetgt **t, **e; + + if (d->gd) { + aoedisk_rm_sysfs(d); + del_gendisk(d->gd); + put_disk(d->gd); + } + t = d->targets; + e = t + NTARGETS; + for (; t < e && *t; t++) + freetgt(d, *t); + if (d->bufpool) + mempool_destroy(d->bufpool); + skbpoolfree(d); + kfree(d); +} + +int +aoedev_flush(const char __user *str, size_t cnt) +{ + ulong flags; + struct aoedev *d, **dd; + struct aoedev *rmd = NULL; + char buf[16]; + int all = 0; + + if (cnt >= 3) { + if (cnt > sizeof buf) + cnt = sizeof buf; + if (copy_from_user(buf, str, cnt)) + return -EFAULT; + all = !strncmp(buf, "all", 3); + } + + flush_scheduled_work(); + spin_lock_irqsave(&devlist_lock, flags); + dd = &devlist; + while ((d = *dd)) { + spin_lock(&d->lock); + if ((!all && (d->flags & DEVFL_UP)) + || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) + || d->nopen) { + spin_unlock(&d->lock); + dd = &d->next; + continue; + } + *dd = d->next; + aoedev_downdev(d); + d->flags |= DEVFL_TKILL; + spin_unlock(&d->lock); + d->next = rmd; + rmd = d; + } + spin_unlock_irqrestore(&devlist_lock, flags); + while ((d = rmd)) { + rmd = d->next; + del_timer_sync(&d->timer); + aoedev_freedev(d); /* must be able to sleep */ + } + return 0; +} + +/* I'm not really sure that this is a realistic problem, but if the +network driver goes gonzo let's just leak memory after complaining. */ +static void +skbfree(struct sk_buff *skb) +{ + enum { Sms = 100, Tms = 3*1000}; + int i = Tms / Sms; + + if (skb == NULL) + return; + while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0) + msleep(Sms); + if (i <= 0) { + printk(KERN_ERR + "aoe: %s holds ref: %s\n", + skb->dev ? skb->dev->name : "netif", + "cannot free skb -- memory leaked."); + return; + } + skb_shinfo(skb)->nr_frags = skb->data_len = 0; + skb_trim(skb, 0); + dev_kfree_skb(skb); +} + +static void +skbpoolfree(struct aoedev *d) +{ + struct sk_buff *skb; + + while ((skb = d->skbpool_hd)) { + d->skbpool_hd = skb->next; + skb->next = NULL; + skbfree(skb); + } + d->skbpool_tl = NULL; } /* find it or malloc it */ struct aoedev * -aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt) +aoedev_by_sysminor_m(ulong sysminor) { struct aoedev *d; ulong flags; @@ -151,43 +228,43 @@ aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt) for (d=devlist; d; d=d->next) if (d->sysminor == sysminor) break; - - if (d == NULL) { - d = aoedev_newdev(bufcnt); - if (d == NULL) { - spin_unlock_irqrestore(&devlist_lock, flags); - printk(KERN_INFO "aoe: aoedev_newdev failure.\n"); - return NULL; - } - d->sysminor = sysminor; - d->aoemajor = AOEMAJOR(sysminor); - d->aoeminor = AOEMINOR(sysminor); - } - + if (d) + goto out; + d = kcalloc(1, sizeof *d, GFP_ATOMIC); + if (!d) + goto out; + INIT_WORK(&d->work, aoecmd_sleepwork); + spin_lock_init(&d->lock); + init_timer(&d->timer); + d->timer.data = (ulong) d; + d->timer.function = dummy_timer; + d->timer.expires = jiffies + HZ; + add_timer(&d->timer); + d->bufpool = NULL; /* defer to aoeblk_gdalloc */ + d->tgt = d->targets; + INIT_LIST_HEAD(&d->bufq); + d->sysminor = sysminor; + d->aoemajor = AOEMAJOR(sysminor); + d->aoeminor = AOEMINOR(sysminor); + d->mintimer = MINTIMER; + d->next = devlist; + devlist = d; + out: spin_unlock_irqrestore(&devlist_lock, flags); return d; } static void -aoedev_freedev(struct aoedev *d) +freetgt(struct aoedev *d, struct aoetgt *t) { struct frame *f, *e; - if (d->gd) { - aoedisk_rm_sysfs(d); - del_gendisk(d->gd); - put_disk(d->gd); - } - f = d->frames; - e = f + d->nframes; - for (; f<e; f++) { - skb_shinfo(f->skb)->nr_frags = 0; - dev_kfree_skb(f->skb); - } - kfree(d->frames); - if (d->bufpool) - mempool_destroy(d->bufpool); - kfree(d); + f = t->frames; + e = f + t->nframes; + for (; f < e; f++) + skbfree(f->skb); + kfree(t->frames); + kfree(t); } void @@ -214,7 +291,5 @@ aoedev_exit(void) int __init aoedev_init(void) { - spin_lock_init(&devlist_lock); return 0; } - diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c index a04b7d61329..7b15a5e9cec 100644 --- a/drivers/block/aoe/aoemain.c +++ b/drivers/block/aoe/aoemain.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2006 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ /* * aoemain.c * Module initialization routines, discover timer diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 4e6deb7f5c2..8460ef736d5 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2006 Coraid, Inc. See COPYING for GPL terms. */ +/* Copyright (c) 2007 Coraid, Inc. See COPYING for GPL terms. */ /* * aoenet.c * Ethernet portion of AoE driver @@ -83,7 +83,7 @@ set_aoe_iflist(const char __user *user_str, size_t size) return 0; } -u64 +unsigned long long mac_addr(char addr[6]) { __be64 n = 0; @@ -91,7 +91,7 @@ mac_addr(char addr[6]) memcpy(p + 2, addr, 6); /* (sizeof addr != 6) */ - return __be64_to_cpu(n); + return (unsigned long long) __be64_to_cpu(n); } void @@ -137,9 +137,12 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, if (n > NECODES) n = 0; if (net_ratelimit()) - printk(KERN_ERR "aoe: error packet from %d.%d; ecode=%d '%s'\n", - be16_to_cpu(get_unaligned(&h->major)), h->minor, - h->err, aoe_errlist[n]); + printk(KERN_ERR + "%s%d.%d@%s; ecode=%d '%s'\n", + "aoe: error packet from ", + be16_to_cpu(get_unaligned(&h->major)), + h->minor, skb->dev->name, + h->err, aoe_errlist[n]); goto exit; } diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 94268c75d04..424995073c6 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -90,7 +90,7 @@ static struct atari_disk_type { unsigned blocks; /* total number of blocks */ unsigned fdc_speed; /* fdc_speed setting */ unsigned stretch; /* track doubling ? */ -} disk_type[] = { +} atari_disk_type[] = { { "d360", 9, 720, 0, 0}, /* 0: 360kB diskette */ { "D360", 9, 720, 0, 1}, /* 1: 360kb in 720k or 1.2MB drive */ { "D720", 9,1440, 0, 0}, /* 2: 720kb in 720k or 1.2MB drive */ @@ -658,7 +658,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) return -EINVAL; } type = minor2disktype[type].index; - UDT = &disk_type[type]; + UDT = &atari_disk_type[type]; } if (!UDT || desc->track >= UDT->blocks/UDT->spt/2 || desc->head >= 2) { @@ -1064,7 +1064,7 @@ static void fd_rwsec_done1(int status) searched for a non-existent sector! */ !(read_track && FDC_READ(FDCREG_SECTOR) > SUDT->spt)) { if (Probing) { - if (SUDT > disk_type) { + if (SUDT > atari_disk_type) { if (SUDT[-1].blocks > ReqBlock) { /* try another disk type */ SUDT--; @@ -1082,7 +1082,7 @@ static void fd_rwsec_done1(int status) } else { /* record not found, but not probing. Maybe stretch wrong ? Restart probing */ if (SUD.autoprobe) { - SUDT = disk_type + StartDiskType[DriveType]; + SUDT = atari_disk_type + StartDiskType[DriveType]; set_capacity(unit[SelectedDrive].disk, SUDT->blocks); Probing = 1; @@ -1421,7 +1421,7 @@ repeat: if (type == 0) { if (!UDT) { Probing = 1; - UDT = disk_type + StartDiskType[DriveType]; + UDT = atari_disk_type + StartDiskType[DriveType]; set_capacity(floppy->disk, UDT->blocks); UD.autoprobe = 1; } @@ -1439,7 +1439,7 @@ repeat: goto repeat; } type = minor2disktype[type].index; - UDT = &disk_type[type]; + UDT = &atari_disk_type[type]; set_capacity(floppy->disk, UDT->blocks); UD.autoprobe = 0; } @@ -1505,7 +1505,7 @@ static int fd_ioctl(struct inode *inode, struct file *filp, if (minor2disktype[type].drive_types > DriveType) return -ENODEV; type = minor2disktype[type].index; - dtp = &disk_type[type]; + dtp = &atari_disk_type[type]; if (UD.flags & FTD_MSG) printk (KERN_ERR "floppy%d: found dtp %p name %s!\n", drive, dtp, dtp->name); @@ -1576,7 +1576,7 @@ static int fd_ioctl(struct inode *inode, struct file *filp, continue; } setidx = minor2disktype[settype].index; - dtp = &disk_type[setidx]; + dtp = &atari_disk_type[setidx]; /* found matching entry ?? */ if ( dtp->blocks == setprm.size diff --git a/drivers/block/brd.c b/drivers/block/brd.c new file mode 100644 index 00000000000..85364804364 --- /dev/null +++ b/drivers/block/brd.c @@ -0,0 +1,583 @@ +/* + * Ram backed block device driver. + * + * Copyright (C) 2007 Nick Piggin + * Copyright (C) 2007 Novell Inc. + * + * Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright + * of their respective owners. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/major.h> +#include <linux/blkdev.h> +#include <linux/bio.h> +#include <linux/highmem.h> +#include <linux/gfp.h> +#include <linux/radix-tree.h> +#include <linux/buffer_head.h> /* invalidate_bh_lrus() */ + +#include <asm/uaccess.h> + +#define SECTOR_SHIFT 9 +#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) +#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) + +/* + * Each block ramdisk device has a radix_tree brd_pages of pages that stores + * the pages containing the block device's contents. A brd page's ->index is + * its offset in PAGE_SIZE units. This is similar to, but in no way connected + * with, the kernel's pagecache or buffer cache (which sit above our block + * device). + */ +struct brd_device { + int brd_number; + int brd_refcnt; + loff_t brd_offset; + loff_t brd_sizelimit; + unsigned brd_blocksize; + + struct request_queue *brd_queue; + struct gendisk *brd_disk; + struct list_head brd_list; + + /* + * Backing store of pages and lock to protect it. This is the contents + * of the block device. + */ + spinlock_t brd_lock; + struct radix_tree_root brd_pages; +}; + +/* + * Look up and return a brd's page for a given sector. + */ +static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) +{ + pgoff_t idx; + struct page *page; + + /* + * The page lifetime is protected by the fact that we have opened the + * device node -- brd pages will never be deleted under us, so we + * don't need any further locking or refcounting. + * + * This is strictly true for the radix-tree nodes as well (ie. we + * don't actually need the rcu_read_lock()), however that is not a + * documented feature of the radix-tree API so it is better to be + * safe here (we don't have total exclusion from radix tree updates + * here, only deletes). + */ + rcu_read_lock(); + idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */ + page = radix_tree_lookup(&brd->brd_pages, idx); + rcu_read_unlock(); + + BUG_ON(page && page->index != idx); + + return page; +} + +/* + * Look up and return a brd's page for a given sector. + * If one does not exist, allocate an empty page, and insert that. Then + * return it. + */ +static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) +{ + pgoff_t idx; + struct page *page; + gfp_t gfp_flags; + + page = brd_lookup_page(brd, sector); + if (page) + return page; + + /* + * Must use NOIO because we don't want to recurse back into the + * block or filesystem layers from page reclaim. + * + * Cannot support XIP and highmem, because our ->direct_access + * routine for XIP must return memory that is always addressable. + * If XIP was reworked to use pfns and kmap throughout, this + * restriction might be able to be lifted. + */ + gfp_flags = GFP_NOIO | __GFP_ZERO; +#ifndef CONFIG_BLK_DEV_XIP + gfp_flags |= __GFP_HIGHMEM; +#endif + page = alloc_page(GFP_NOIO | __GFP_HIGHMEM | __GFP_ZERO); + if (!page) + return NULL; + + if (radix_tree_preload(GFP_NOIO)) { + __free_page(page); + return NULL; + } + + spin_lock(&brd->brd_lock); + idx = sector >> PAGE_SECTORS_SHIFT; + if (radix_tree_insert(&brd->brd_pages, idx, page)) { + __free_page(page); + page = radix_tree_lookup(&brd->brd_pages, idx); + BUG_ON(!page); + BUG_ON(page->index != idx); + } else + page->index = idx; + spin_unlock(&brd->brd_lock); + + radix_tree_preload_end(); + + return page; +} + +/* + * Free all backing store pages and radix tree. This must only be called when + * there are no other users of the device. + */ +#define FREE_BATCH 16 +static void brd_free_pages(struct brd_device *brd) +{ + unsigned long pos = 0; + struct page *pages[FREE_BATCH]; + int nr_pages; + + do { + int i; + + nr_pages = radix_tree_gang_lookup(&brd->brd_pages, + (void **)pages, pos, FREE_BATCH); + + for (i = 0; i < nr_pages; i++) { + void *ret; + + BUG_ON(pages[i]->index < pos); + pos = pages[i]->index; + ret = radix_tree_delete(&brd->brd_pages, pos); + BUG_ON(!ret || ret != pages[i]); + __free_page(pages[i]); + } + + pos++; + + /* + * This assumes radix_tree_gang_lookup always returns as + * many pages as possible. If the radix-tree code changes, + * so will this have to. + */ + } while (nr_pages == FREE_BATCH); +} + +/* + * copy_to_brd_setup must be called before copy_to_brd. It may sleep. + */ +static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) +{ + unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; + size_t copy; + + copy = min_t(size_t, n, PAGE_SIZE - offset); + if (!brd_insert_page(brd, sector)) + return -ENOMEM; + if (copy < n) { + sector += copy >> SECTOR_SHIFT; + if (!brd_insert_page(brd, sector)) + return -ENOMEM; + } + return 0; +} + +/* + * Copy n bytes from src to the brd starting at sector. Does not sleep. + */ +static void copy_to_brd(struct brd_device *brd, const void *src, + sector_t sector, size_t n) +{ + struct page *page; + void *dst; + unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; + size_t copy; + + copy = min_t(size_t, n, PAGE_SIZE - offset); + page = brd_lookup_page(brd, sector); + BUG_ON(!page); + + dst = kmap_atomic(page, KM_USER1); + memcpy(dst + offset, src, copy); + kunmap_atomic(dst, KM_USER1); + + if (copy < n) { + src += copy; + sector += copy >> SECTOR_SHIFT; + copy = n - copy; + page = brd_lookup_page(brd, sector); + BUG_ON(!page); + + dst = kmap_atomic(page, KM_USER1); + memcpy(dst, src, copy); + kunmap_atomic(dst, KM_USER1); + } +} + +/* + * Copy n bytes to dst from the brd starting at sector. Does not sleep. + */ +static void copy_from_brd(void *dst, struct brd_device *brd, + sector_t sector, size_t n) +{ + struct page *page; + void *src; + unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; + size_t copy; + + copy = min_t(size_t, n, PAGE_SIZE - offset); + page = brd_lookup_page(brd, sector); + if (page) { + src = kmap_atomic(page, KM_USER1); + memcpy(dst, src + offset, copy); + kunmap_atomic(src, KM_USER1); + } else + memset(dst, 0, copy); + + if (copy < n) { + dst += copy; + sector += copy >> SECTOR_SHIFT; + copy = n - copy; + page = brd_lookup_page(brd, sector); + if (page) { + src = kmap_atomic(page, KM_USER1); + memcpy(dst, src, copy); + kunmap_atomic(src, KM_USER1); + } else + memset(dst, 0, copy); + } +} + +/* + * Process a single bvec of a bio. + */ +static int brd_do_bvec(struct brd_device *brd, struct page *page, + unsigned int len, unsigned int off, int rw, + sector_t sector) +{ + void *mem; + int err = 0; + + if (rw != READ) { + err = copy_to_brd_setup(brd, sector, len); + if (err) + goto out; + } + + mem = kmap_atomic(page, KM_USER0); + if (rw == READ) { + copy_from_brd(mem + off, brd, sector, len); + flush_dcache_page(page); + } else + copy_to_brd(brd, mem + off, sector, len); + kunmap_atomic(mem, KM_USER0); + +out: + return err; +} + +static int brd_make_request(struct request_queue *q, struct bio *bio) +{ + struct block_device *bdev = bio->bi_bdev; + struct brd_device *brd = bdev->bd_disk->private_data; + int rw; + struct bio_vec *bvec; + sector_t sector; + int i; + int err = -EIO; + + sector = bio->bi_sector; + if (sector + (bio->bi_size >> SECTOR_SHIFT) > + get_capacity(bdev->bd_disk)) + goto out; + + rw = bio_rw(bio); + if (rw == READA) + rw = READ; + + bio_for_each_segment(bvec, bio, i) { + unsigned int len = bvec->bv_len; + err = brd_do_bvec(brd, bvec->bv_page, len, + bvec->bv_offset, rw, sector); + if (err) + break; + sector += len >> SECTOR_SHIFT; + } + +out: + bio_endio(bio, err); + + return 0; +} + +#ifdef CONFIG_BLK_DEV_XIP +static int brd_direct_access (struct block_device *bdev, sector_t sector, + unsigned long *data) +{ + struct brd_device *brd = bdev->bd_disk->private_data; + struct page *page; + + if (!brd) + return -ENODEV; + if (sector & (PAGE_SECTORS-1)) + return -EINVAL; + if (sector + PAGE_SECTORS > get_capacity(bdev->bd_disk)) + return -ERANGE; + page = brd_insert_page(brd, sector); + if (!page) + return -ENOMEM; + *data = (unsigned long)page_address(page); + + return 0; +} +#endif + +static int brd_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + int error; + struct block_device *bdev = inode->i_bdev; + struct brd_device *brd = bdev->bd_disk->private_data; + + if (cmd != BLKFLSBUF) + return -ENOTTY; + + /* + * ram device BLKFLSBUF has special semantics, we want to actually + * release and destroy the ramdisk data. + */ + mutex_lock(&bdev->bd_mutex); + error = -EBUSY; + if (bdev->bd_openers <= 1) { + /* + * Invalidate the cache first, so it isn't written + * back to the device. + * + * Another thread might instantiate more buffercache here, + * but there is not much we can do to close that race. + */ + invalidate_bh_lrus(); + truncate_inode_pages(bdev->bd_inode->i_mapping, 0); + brd_free_pages(brd); + error = 0; + } + mutex_unlock(&bdev->bd_mutex); + + return error; +} + +static struct block_device_operations brd_fops = { + .owner = THIS_MODULE, + .ioctl = brd_ioctl, +#ifdef CONFIG_BLK_DEV_XIP + .direct_access = brd_direct_access, +#endif +}; + +/* + * And now the modules code and kernel interface. + */ +static int rd_nr; +int rd_size = CONFIG_BLK_DEV_RAM_SIZE; +module_param(rd_nr, int, 0); +MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); +module_param(rd_size, int, 0); +MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); + +#ifndef MODULE +/* Legacy boot options - nonmodular */ +static int __init ramdisk_size(char *str) +{ + rd_size = simple_strtol(str, NULL, 0); + return 1; +} +static int __init ramdisk_size2(char *str) +{ + return ramdisk_size(str); +} +__setup("ramdisk=", ramdisk_size); +__setup("ramdisk_size=", ramdisk_size2); +#endif + +/* + * The device scheme is derived from loop.c. Keep them in synch where possible + * (should share code eventually). + */ +static LIST_HEAD(brd_devices); +static DEFINE_MUTEX(brd_devices_mutex); + +static struct brd_device *brd_alloc(int i) +{ + struct brd_device *brd; + struct gendisk *disk; + + brd = kzalloc(sizeof(*brd), GFP_KERNEL); + if (!brd) + goto out; + brd->brd_number = i; + spin_lock_init(&brd->brd_lock); + INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC); + + brd->brd_queue = blk_alloc_queue(GFP_KERNEL); + if (!brd->brd_queue) + goto out_free_dev; + blk_queue_make_request(brd->brd_queue, brd_make_request); + blk_queue_max_sectors(brd->brd_queue, 1024); + blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); + + disk = brd->brd_disk = alloc_disk(1); + if (!disk) + goto out_free_queue; + disk->major = RAMDISK_MAJOR; + disk->first_minor = i; + disk->fops = &brd_fops; + disk->private_data = brd; + disk->queue = brd->brd_queue; + sprintf(disk->disk_name, "ram%d", i); + set_capacity(disk, rd_size * 2); + + return brd; + +out_free_queue: + blk_cleanup_queue(brd->brd_queue); +out_free_dev: + kfree(brd); +out: + return NULL; +} + +static void brd_free(struct brd_device *brd) +{ + put_disk(brd->brd_disk); + blk_cleanup_queue(brd->brd_queue); + brd_free_pages(brd); + kfree(brd); +} + +static struct brd_device *brd_init_one(int i) +{ + struct brd_device *brd; + + list_for_each_entry(brd, &brd_devices, brd_list) { + if (brd->brd_number == i) + goto out; + } + + brd = brd_alloc(i); + if (brd) { + add_disk(brd->brd_disk); + list_add_tail(&brd->brd_list, &brd_devices); + } +out: + return brd; +} + +static void brd_del_one(struct brd_device *brd) +{ + list_del(&brd->brd_list); + del_gendisk(brd->brd_disk); + brd_free(brd); +} + +static struct kobject *brd_probe(dev_t dev, int *part, void *data) +{ + struct brd_device *brd; + struct kobject *kobj; + + mutex_lock(&brd_devices_mutex); + brd = brd_init_one(dev & MINORMASK); + kobj = brd ? get_disk(brd->brd_disk) : ERR_PTR(-ENOMEM); + mutex_unlock(&brd_devices_mutex); + + *part = 0; + return kobj; +} + +static int __init brd_init(void) +{ + int i, nr; + unsigned long range; + struct brd_device *brd, *next; + + /* + * brd module now has a feature to instantiate underlying device + * structure on-demand, provided that there is an access dev node. + * However, this will not work well with user space tool that doesn't + * know about such "feature". In order to not break any existing + * tool, we do the following: + * + * (1) if rd_nr is specified, create that many upfront, and this + * also becomes a hard limit. + * (2) if rd_nr is not specified, create 1 rd device on module + * load, user can further extend brd device by create dev node + * themselves and have kernel automatically instantiate actual + * device on-demand. + */ + if (rd_nr > 1UL << MINORBITS) + return -EINVAL; + + if (rd_nr) { + nr = rd_nr; + range = rd_nr; + } else { + nr = CONFIG_BLK_DEV_RAM_COUNT; + range = 1UL << MINORBITS; + } + + if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) + return -EIO; + + for (i = 0; i < nr; i++) { + brd = brd_alloc(i); + if (!brd) + goto out_free; + list_add_tail(&brd->brd_list, &brd_devices); + } + + /* point of no return */ + + list_for_each_entry(brd, &brd_devices, brd_list) + add_disk(brd->brd_disk); + + blk_register_region(MKDEV(RAMDISK_MAJOR, 0), range, + THIS_MODULE, brd_probe, NULL, NULL); + + printk(KERN_INFO "brd: module loaded\n"); + return 0; + +out_free: + list_for_each_entry_safe(brd, next, &brd_devices, brd_list) { + list_del(&brd->brd_list); + brd_free(brd); + } + + unregister_blkdev(RAMDISK_MAJOR, "brd"); + return -ENOMEM; +} + +static void __exit brd_exit(void) +{ + unsigned long range; + struct brd_device *brd, *next; + + range = rd_nr ? rd_nr : 1UL << MINORBITS; + + list_for_each_entry_safe(brd, next, &brd_devices, brd_list) + brd_del_one(brd); + + blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), range); + unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); +} + +module_init(brd_init); +module_exit(brd_exit); + diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 855ce8e5efb..9715be3f248 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2630,12 +2630,14 @@ static void do_cciss_request(struct request_queue *q) c->Request.CDB[8] = creq->nr_sectors & 0xff; c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0; } else { + u32 upper32 = upper_32_bits(start_blk); + c->Request.CDBLen = 16; c->Request.CDB[1]= 0; - c->Request.CDB[2]= (start_blk >> 56) & 0xff; //MSB - c->Request.CDB[3]= (start_blk >> 48) & 0xff; - c->Request.CDB[4]= (start_blk >> 40) & 0xff; - c->Request.CDB[5]= (start_blk >> 32) & 0xff; + c->Request.CDB[2]= (upper32 >> 24) & 0xff; //MSB + c->Request.CDB[3]= (upper32 >> 16) & 0xff; + c->Request.CDB[4]= (upper32 >> 8) & 0xff; + c->Request.CDB[5]= upper32 & 0xff; c->Request.CDB[6]= (start_blk >> 24) & 0xff; c->Request.CDB[7]= (start_blk >> 16) & 0xff; c->Request.CDB[8]= (start_blk >> 8) & 0xff; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b8af22e610d..91ebb007416 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -973,6 +973,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->transfer = xfer->transfer; lo->ioctl = xfer->ioctl; + if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) != + (info->lo_flags & LO_FLAGS_AUTOCLEAR)) + lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; + lo->lo_encrypt_key_size = info->lo_encrypt_key_size; lo->lo_init[0] = info->lo_init[0]; lo->lo_init[1] = info->lo_init[1]; @@ -1331,6 +1335,10 @@ static int lo_release(struct inode *inode, struct file *file) mutex_lock(&lo->lo_ctl_mutex); --lo->lo_refcnt; + + if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) && !lo->lo_refcnt) + loop_clr_fd(lo, inode->i_bdev); + mutex_unlock(&lo->lo_ctl_mutex); return 0; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ae3106045ee..018753c59b8 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -54,7 +54,7 @@ static unsigned int debugflags; #endif /* NDEBUG */ static unsigned int nbds_max = 16; -static struct nbd_device nbd_dev[MAX_NBD]; +static struct nbd_device *nbd_dev; /* * Use just one lock (or at most 1 per NIC). Two arguments for this: @@ -649,11 +649,9 @@ static int __init nbd_init(void) BUILD_BUG_ON(sizeof(struct nbd_request) != 28); - if (nbds_max > MAX_NBD) { - printk(KERN_CRIT "nbd: cannot allocate more than %u nbds; %u requested.\n", MAX_NBD, - nbds_max); - return -EINVAL; - } + nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); + if (!nbd_dev) + return -ENOMEM; for (i = 0; i < nbds_max; i++) { struct gendisk *disk = alloc_disk(1); diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c index 76096cad798..8b9549ab4a4 100644 --- a/drivers/block/paride/pt.c +++ b/drivers/block/paride/pt.c @@ -660,7 +660,7 @@ static int pt_open(struct inode *inode, struct file *file) pt_identify(tape); err = -ENODEV; - if (!tape->flags & PT_MEDIA) + if (!(tape->flags & PT_MEDIA)) goto out; err = -EROFS; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index e9de1712e5a..674cd66dcab 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2212,11 +2212,11 @@ static int pkt_media_speed(struct pktcdvd_device *pd, unsigned *speed) return ret; } - if (!buf[6] & 0x40) { + if (!(buf[6] & 0x40)) { printk(DRIVER_NAME": Disc type is not CD-RW\n"); return 1; } - if (!buf[6] & 0x4) { + if (!(buf[6] & 0x4)) { printk(DRIVER_NAME": A1 values on media are not valid, maybe not CDRW?\n"); return 1; } diff --git a/drivers/block/rd.c b/drivers/block/rd.c deleted file mode 100644 index 82f4eecc869..00000000000 --- a/drivers/block/rd.c +++ /dev/null @@ -1,536 +0,0 @@ -/* - * ramdisk.c - Multiple RAM disk driver - gzip-loading version - v. 0.8 beta. - * - * (C) Chad Page, Theodore Ts'o, et. al, 1995. - * - * This RAM disk is designed to have filesystems created on it and mounted - * just like a regular floppy disk. - * - * It also does something suggested by Linus: use the buffer cache as the - * RAM disk data. This makes it possible to dynamically allocate the RAM disk - * buffer - with some consequences I have to deal with as I write this. - * - * This code is based on the original ramdisk.c, written mostly by - * Theodore Ts'o (TYT) in 1991. The code was largely rewritten by - * Chad Page to use the buffer cache to store the RAM disk data in - * 1995; Theodore then took over the driver again, and cleaned it up - * for inclusion in the mainline kernel. - * - * The original CRAMDISK code was written by Richard Lyons, and - * adapted by Chad Page to use the new RAM disk interface. Theodore - * Ts'o rewrote it so that both the compressed RAM disk loader and the - * kernel decompressor uses the same inflate.c codebase. The RAM disk - * loader now also loads into a dynamic (buffer cache based) RAM disk, - * not the old static RAM disk. Support for the old static RAM disk has - * been completely removed. - * - * Loadable module support added by Tom Dyas. - * - * Further cleanups by Chad Page (page0588@sundance.sjsu.edu): - * Cosmetic changes in #ifdef MODULE, code movement, etc. - * When the RAM disk module is removed, free the protected buffers - * Default RAM disk size changed to 2.88 MB - * - * Added initrd: Werner Almesberger & Hans Lermen, Feb '96 - * - * 4/25/96 : Made RAM disk size a parameter (default is now 4 MB) - * - Chad Page - * - * Add support for fs images split across >1 disk, Paul Gortmaker, Mar '98 - * - * Make block size and block size shift for RAM disks a global macro - * and set blk_size for -ENOSPC, Werner Fink <werner@suse.de>, Apr '99 - */ - -#include <linux/string.h> -#include <linux/slab.h> -#include <asm/atomic.h> -#include <linux/bio.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/init.h> -#include <linux/pagemap.h> -#include <linux/blkdev.h> -#include <linux/genhd.h> -#include <linux/buffer_head.h> /* for invalidate_bdev() */ -#include <linux/backing-dev.h> -#include <linux/blkpg.h> -#include <linux/writeback.h> - -#include <asm/uaccess.h> - -/* Various static variables go here. Most are used only in the RAM disk code. - */ - -static struct gendisk *rd_disks[CONFIG_BLK_DEV_RAM_COUNT]; -static struct block_device *rd_bdev[CONFIG_BLK_DEV_RAM_COUNT];/* Protected device data */ -static struct request_queue *rd_queue[CONFIG_BLK_DEV_RAM_COUNT]; - -/* - * Parameters for the boot-loading of the RAM disk. These are set by - * init/main.c (from arguments to the kernel command line) or from the - * architecture-specific setup routine (from the stored boot sector - * information). - */ -int rd_size = CONFIG_BLK_DEV_RAM_SIZE; /* Size of the RAM disks */ -/* - * It would be very desirable to have a soft-blocksize (that in the case - * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because - * doing that we'll achieve a far better MM footprint. Using a rd_blocksize of - * BLOCK_SIZE in the worst case we'll make PAGE_SIZE/BLOCK_SIZE buffer-pages - * unfreeable. With a rd_blocksize of PAGE_SIZE instead we are sure that only - * 1 page will be protected. Depending on the size of the ramdisk you - * may want to change the ramdisk blocksize to achieve a better or worse MM - * behaviour. The default is still BLOCK_SIZE (needed by rd_load_image that - * supposes the filesystem in the image uses a BLOCK_SIZE blocksize). - */ -static int rd_blocksize = CONFIG_BLK_DEV_RAM_BLOCKSIZE; - -/* - * Copyright (C) 2000 Linus Torvalds. - * 2000 Transmeta Corp. - * aops copied from ramfs. - */ - -/* - * If a ramdisk page has buffers, some may be uptodate and some may be not. - * To bring the page uptodate we zero out the non-uptodate buffers. The - * page must be locked. - */ -static void make_page_uptodate(struct page *page) -{ - if (page_has_buffers(page)) { - struct buffer_head *bh = page_buffers(page); - struct buffer_head *head = bh; - - do { - if (!buffer_uptodate(bh)) { - memset(bh->b_data, 0, bh->b_size); - /* - * akpm: I'm totally undecided about this. The - * buffer has just been magically brought "up to - * date", but nobody should want to be reading - * it anyway, because it hasn't been used for - * anything yet. It is still in a "not read - * from disk yet" state. - * - * But non-uptodate buffers against an uptodate - * page are against the rules. So do it anyway. - */ - set_buffer_uptodate(bh); - } - } while ((bh = bh->b_this_page) != head); - } else { - memset(page_address(page), 0, PAGE_CACHE_SIZE); - } - flush_dcache_page(page); - SetPageUptodate(page); -} - -static int ramdisk_readpage(struct file *file, struct page *page) -{ - if (!PageUptodate(page)) - make_page_uptodate(page); - unlock_page(page); - return 0; -} - -static int ramdisk_prepare_write(struct file *file, struct page *page, - unsigned offset, unsigned to) -{ - if (!PageUptodate(page)) - make_page_uptodate(page); - return 0; -} - -static int ramdisk_commit_write(struct file *file, struct page *page, - unsigned offset, unsigned to) -{ - set_page_dirty(page); - return 0; -} - -/* - * ->writepage to the blockdev's mapping has to redirty the page so that the - * VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM - * won't try to (pointlessly) write the page again for a while. - * - * Really, these pages should not be on the LRU at all. - */ -static int ramdisk_writepage(struct page *page, struct writeback_control *wbc) -{ - if (!PageUptodate(page)) - make_page_uptodate(page); - SetPageDirty(page); - if (wbc->for_reclaim) - return AOP_WRITEPAGE_ACTIVATE; - unlock_page(page); - return 0; -} - -/* - * This is a little speedup thing: short-circuit attempts to write back the - * ramdisk blockdev inode to its non-existent backing store. - */ -static int ramdisk_writepages(struct address_space *mapping, - struct writeback_control *wbc) -{ - return 0; -} - -/* - * ramdisk blockdev pages have their own ->set_page_dirty() because we don't - * want them to contribute to dirty memory accounting. - */ -static int ramdisk_set_page_dirty(struct page *page) -{ - if (!TestSetPageDirty(page)) - return 1; - return 0; -} - -/* - * releasepage is called by pagevec_strip/try_to_release_page if - * buffers_heads_over_limit is true. Without a releasepage function - * try_to_free_buffers is called instead. That can unset the dirty - * bit of our ram disk pages, which will be eventually freed, even - * if the page is still in use. - */ -static int ramdisk_releasepage(struct page *page, gfp_t dummy) -{ - return 0; -} - -static const struct address_space_operations ramdisk_aops = { - .readpage = ramdisk_readpage, - .prepare_write = ramdisk_prepare_write, - .commit_write = ramdisk_commit_write, - .writepage = ramdisk_writepage, - .set_page_dirty = ramdisk_set_page_dirty, - .writepages = ramdisk_writepages, - .releasepage = ramdisk_releasepage, -}; - -static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector, - struct address_space *mapping) -{ - pgoff_t index = sector >> (PAGE_CACHE_SHIFT - 9); - unsigned int vec_offset = vec->bv_offset; - int offset = (sector << 9) & ~PAGE_CACHE_MASK; - int size = vec->bv_len; - int err = 0; - - do { - int count; - struct page *page; - char *src; - char *dst; - - count = PAGE_CACHE_SIZE - offset; - if (count > size) - count = size; - size -= count; - - page = grab_cache_page(mapping, index); - if (!page) { - err = -ENOMEM; - goto out; - } - - if (!PageUptodate(page)) - make_page_uptodate(page); - - index++; - - if (rw == READ) { - src = kmap_atomic(page, KM_USER0) + offset; - dst = kmap_atomic(vec->bv_page, KM_USER1) + vec_offset; - } else { - src = kmap_atomic(vec->bv_page, KM_USER0) + vec_offset; - dst = kmap_atomic(page, KM_USER1) + offset; - } - offset = 0; - vec_offset += count; - - memcpy(dst, src, count); - - kunmap_atomic(src, KM_USER0); - kunmap_atomic(dst, KM_USER1); - - if (rw == READ) - flush_dcache_page(vec->bv_page); - else - set_page_dirty(page); - unlock_page(page); - put_page(page); - } while (size); - - out: - return err; -} - -/* - * Basically, my strategy here is to set up a buffer-head which can't be - * deleted, and make that my Ramdisk. If the request is outside of the - * allocated size, we must get rid of it... - * - * 19-JAN-1998 Richard Gooch <rgooch@atnf.csiro.au> Added devfs support - * - */ -static int rd_make_request(struct request_queue *q, struct bio *bio) -{ - struct block_device *bdev = bio->bi_bdev; - struct address_space * mapping = bdev->bd_inode->i_mapping; - sector_t sector = bio->bi_sector; - unsigned long len = bio->bi_size >> 9; - int rw = bio_data_dir(bio); - struct bio_vec *bvec; - int ret = 0, i; - - if (sector + len > get_capacity(bdev->bd_disk)) - goto fail; - - if (rw==READA) - rw=READ; - - bio_for_each_segment(bvec, bio, i) { - ret |= rd_blkdev_pagecache_IO(rw, bvec, sector, mapping); - sector += bvec->bv_len >> 9; - } - if (ret) - goto fail; - - bio_endio(bio, 0); - return 0; -fail: - bio_io_error(bio); - return 0; -} - -static int rd_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - int error; - struct block_device *bdev = inode->i_bdev; - - if (cmd != BLKFLSBUF) - return -ENOTTY; - - /* - * special: we want to release the ramdisk memory, it's not like with - * the other blockdevices where this ioctl only flushes away the buffer - * cache - */ - error = -EBUSY; - mutex_lock(&bdev->bd_mutex); - if (bdev->bd_openers <= 2) { - truncate_inode_pages(bdev->bd_inode->i_mapping, 0); - error = 0; - } - mutex_unlock(&bdev->bd_mutex); - return error; -} - -/* - * This is the backing_dev_info for the blockdev inode itself. It doesn't need - * writeback and it does not contribute to dirty memory accounting. - */ -static struct backing_dev_info rd_backing_dev_info = { - .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | BDI_CAP_MAP_COPY, - .unplug_io_fn = default_unplug_io_fn, -}; - -/* - * This is the backing_dev_info for the files which live atop the ramdisk - * "device". These files do need writeback and they do contribute to dirty - * memory accounting. - */ -static struct backing_dev_info rd_file_backing_dev_info = { - .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_MAP_COPY, /* Does contribute to dirty memory */ - .unplug_io_fn = default_unplug_io_fn, -}; - -static int rd_open(struct inode *inode, struct file *filp) -{ - unsigned unit = iminor(inode); - - if (rd_bdev[unit] == NULL) { - struct block_device *bdev = inode->i_bdev; - struct address_space *mapping; - unsigned bsize; - gfp_t gfp_mask; - - inode = igrab(bdev->bd_inode); - rd_bdev[unit] = bdev; - bdev->bd_openers++; - bsize = bdev_hardsect_size(bdev); - bdev->bd_block_size = bsize; - inode->i_blkbits = blksize_bits(bsize); - inode->i_size = get_capacity(bdev->bd_disk)<<9; - - mapping = inode->i_mapping; - mapping->a_ops = &ramdisk_aops; - mapping->backing_dev_info = &rd_backing_dev_info; - bdev->bd_inode_backing_dev_info = &rd_file_backing_dev_info; - - /* - * Deep badness. rd_blkdev_pagecache_IO() needs to allocate - * pagecache pages within a request_fn. We cannot recur back - * into the filesystem which is mounted atop the ramdisk, because - * that would deadlock on fs locks. And we really don't want - * to reenter rd_blkdev_pagecache_IO when we're already within - * that function. - * - * So we turn off __GFP_FS and __GFP_IO. - * - * And to give this thing a hope of working, turn on __GFP_HIGH. - * Hopefully, there's enough regular memory allocation going on - * for the page allocator emergency pools to keep the ramdisk - * driver happy. - */ - gfp_mask = mapping_gfp_mask(mapping); - gfp_mask &= ~(__GFP_FS|__GFP_IO); - gfp_mask |= __GFP_HIGH; - mapping_set_gfp_mask(mapping, gfp_mask); - } - - return 0; -} - -static struct block_device_operations rd_bd_op = { - .owner = THIS_MODULE, - .open = rd_open, - .ioctl = rd_ioctl, -}; - -/* - * Before freeing the module, invalidate all of the protected buffers! - */ -static void __exit rd_cleanup(void) -{ - int i; - - for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { - struct block_device *bdev = rd_bdev[i]; - rd_bdev[i] = NULL; - if (bdev) { - invalidate_bdev(bdev); - blkdev_put(bdev); - } - del_gendisk(rd_disks[i]); - put_disk(rd_disks[i]); - blk_cleanup_queue(rd_queue[i]); - } - unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); - - bdi_destroy(&rd_file_backing_dev_info); - bdi_destroy(&rd_backing_dev_info); -} - -/* - * This is the registration and initialization section of the RAM disk driver - */ -static int __init rd_init(void) -{ - int i; - int err; - - err = bdi_init(&rd_backing_dev_info); - if (err) - goto out2; - - err = bdi_init(&rd_file_backing_dev_info); - if (err) { - bdi_destroy(&rd_backing_dev_info); - goto out2; - } - - err = -ENOMEM; - - if (rd_blocksize > PAGE_SIZE || rd_blocksize < 512 || - (rd_blocksize & (rd_blocksize-1))) { - printk("RAMDISK: wrong blocksize %d, reverting to defaults\n", - rd_blocksize); - rd_blocksize = BLOCK_SIZE; - } - - for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { - rd_disks[i] = alloc_disk(1); - if (!rd_disks[i]) - goto out; - - rd_queue[i] = blk_alloc_queue(GFP_KERNEL); - if (!rd_queue[i]) { - put_disk(rd_disks[i]); - goto out; - } - } - - if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) { - err = -EIO; - goto out; - } - - for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { - struct gendisk *disk = rd_disks[i]; - - blk_queue_make_request(rd_queue[i], &rd_make_request); - blk_queue_hardsect_size(rd_queue[i], rd_blocksize); - - /* rd_size is given in kB */ - disk->major = RAMDISK_MAJOR; - disk->first_minor = i; - disk->fops = &rd_bd_op; - disk->queue = rd_queue[i]; - disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; - sprintf(disk->disk_name, "ram%d", i); - set_capacity(disk, rd_size * 2); - add_disk(rd_disks[i]); - } - - /* rd_size is given in kB */ - printk("RAMDISK driver initialized: " - "%d RAM disks of %dK size %d blocksize\n", - CONFIG_BLK_DEV_RAM_COUNT, rd_size, rd_blocksize); - - return 0; -out: - while (i--) { - put_disk(rd_disks[i]); - blk_cleanup_queue(rd_queue[i]); - } - bdi_destroy(&rd_backing_dev_info); - bdi_destroy(&rd_file_backing_dev_info); -out2: - return err; -} - -module_init(rd_init); -module_exit(rd_cleanup); - -/* options - nonmodular */ -#ifndef MODULE -static int __init ramdisk_size(char *str) -{ - rd_size = simple_strtol(str,NULL,0); - return 1; -} -static int __init ramdisk_blocksize(char *str) -{ - rd_blocksize = simple_strtol(str,NULL,0); - return 1; -} -__setup("ramdisk_size=", ramdisk_size); -__setup("ramdisk_blocksize=", ramdisk_blocksize); -#endif - -/* options - modular */ -module_param(rd_size, int, 0); -MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); -module_param(rd_blocksize, int, 0); -MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes."); -MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); - -MODULE_LICENSE("GPL"); diff --git a/drivers/block/ub.c b/drivers/block/ub.c index a70c1c29a7a..c452e2d355e 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -657,7 +657,6 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq) if ((cmd = ub_get_cmd(lun)) == NULL) return -1; memset(cmd, 0, sizeof(struct ub_scsi_cmd)); - sg_init_table(cmd->sgv, UB_MAX_REQ_SG); blkdev_dequeue_request(rq); @@ -668,6 +667,7 @@ static int ub_request_fn_1(struct ub_lun *lun, struct request *rq) /* * get scatterlist from block layer */ + sg_init_table(&urq->sgv[0], UB_MAX_REQ_SG); n_elem = blk_rq_map_sg(lun->disk->queue, rq, &urq->sgv[0]); if (n_elem < 0) { /* Impossible, because blk_rq_map_sg should not hit ENOMEM. */ diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 78ebfffc77e..4a7a059ebaf 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -1202,8 +1202,10 @@ static int __devexit ace_of_remove(struct of_device *op) } /* Match table for of_platform binding */ -static struct of_device_id __devinit ace_of_match[] = { - { .compatible = "xilinx,xsysace", }, +static struct of_device_id ace_of_match[] __devinitdata = { + { .compatible = "xlnx,opb-sysace-1.00.b", }, + { .compatible = "xlnx,opb-sysace-1.00.c", }, + { .compatible = "xlnx,xps-sysace-1.00.a", }, {}, }; MODULE_DEVICE_TABLE(of, ace_of_match); |