4 files changed, 161 insertions, 193 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 74fae2daf87..c8dbe38c81c 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -239,7 +239,6 @@ enum cfqq_state_flags {
 	CFQ_CFQQ_FLAG_fifo_expire,
 	CFQ_CFQQ_FLAG_idle_window,
 	CFQ_CFQQ_FLAG_prio_changed,
-	CFQ_CFQQ_FLAG_expired,
 };
 
 #define CFQ_CFQQ_FNS(name)						\
@@ -264,7 +263,6 @@ CFQ_CFQQ_FNS(must_dispatch);
 CFQ_CFQQ_FNS(fifo_expire);
 CFQ_CFQQ_FNS(idle_window);
 CFQ_CFQQ_FNS(prio_changed);
-CFQ_CFQQ_FNS(expired);
 #undef CFQ_CFQQ_FNS
 
 enum cfq_rq_state_flags {
@@ -336,7 +334,7 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
  */
 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
 {
-	if (!cfqd->rq_in_driver && cfqd->busy_queues)
+	if (cfqd->busy_queues)
 		kblockd_schedule_work(&cfqd->unplug_work);
 }
 
@@ -736,13 +734,63 @@ __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 		cfqq->slice_left = 0;
 		cfq_clear_cfqq_must_alloc_slice(cfqq);
 		cfq_clear_cfqq_fifo_expire(cfqq);
-		cfq_clear_cfqq_expired(cfqq);
 	}
 
 	cfqd->active_queue = cfqq;
 }
 
 /*
+ * current cfqq expired its slice (or was too idle), select new one
+ */
+static void
+__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+		    int preempted)
+{
+	unsigned long now = jiffies;
+
+	if (cfq_cfqq_wait_request(cfqq))
+		del_timer(&cfqd->idle_slice_timer);
+
+	if (!preempted && !cfq_cfqq_dispatched(cfqq)) {
+		cfqq->service_last = now;
+		cfq_schedule_dispatch(cfqd);
+	}
+
+	cfq_clear_cfqq_must_dispatch(cfqq);
+	cfq_clear_cfqq_wait_request(cfqq);
+
+	/*
+	 * store what was left of this slice, if the queue idled out
+	 * or was preempted
+	 */
+	if (time_after(cfqq->slice_end, now))
+		cfqq->slice_left = cfqq->slice_end - now;
+	else
+		cfqq->slice_left = 0;
+
+	if (cfq_cfqq_on_rr(cfqq))
+		cfq_resort_rr_list(cfqq, preempted);
+
+	if (cfqq == cfqd->active_queue)
+		cfqd->active_queue = NULL;
+
+	if (cfqd->active_cic) {
+		put_io_context(cfqd->active_cic->ioc);
+		cfqd->active_cic = NULL;
+	}
+
+	cfqd->dispatch_slice = 0;
+}
+
+static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted)
+{
+	struct cfq_queue *cfqq = cfqd->active_queue;
+
+	if (cfqq)
+		__cfq_slice_expired(cfqd, cfqq, preempted);
+}
+
+/*
  * 0
  * 0,1
  * 0,1,2
@@ -801,16 +849,7 @@ static int cfq_get_next_prio_level(struct cfq_data *cfqd)
 
 static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
 {
-	struct cfq_queue *cfqq;
-
-	/*
-	 * if current queue is expired but not done with its requests yet,
-	 * wait for that to happen
-	 */
-	if ((cfqq = cfqd->active_queue) != NULL) {
-		if (cfq_cfqq_expired(cfqq) && cfq_cfqq_dispatched(cfqq))
-			return NULL;
-	}
+	struct cfq_queue *cfqq = NULL;
 
 	/*
 	 * if current list is non-empty, grab first entry. if it is empty,
@@ -837,66 +876,11 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
 	return cfqq;
 }
 
-/*
- * current cfqq expired its slice (or was too idle), select new one
- */
-static void
-__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-		    int preempted)
-{
-	unsigned long now = jiffies;
-
-	if (cfq_cfqq_wait_request(cfqq))
-		del_timer(&cfqd->idle_slice_timer);
-
-	if (!preempted && !cfq_cfqq_dispatched(cfqq))
-		cfqq->service_last = now;
-
-	cfq_clear_cfqq_must_dispatch(cfqq);
-	cfq_clear_cfqq_wait_request(cfqq);
-
-	/*
-	 * store what was left of this slice, if the queue idled out
-	 * or was preempted
-	 */
-	if (time_after(cfqq->slice_end, now))
-		cfqq->slice_left = cfqq->slice_end - now;
-	else
-		cfqq->slice_left = 0;
-
-	if (cfq_cfqq_on_rr(cfqq))
-		cfq_resort_rr_list(cfqq, preempted);
-
-	if (cfqq == cfqd->active_queue)
-		cfqd->active_queue = NULL;
-
-	if (cfqd->active_cic) {
-		put_io_context(cfqd->active_cic->ioc);
-		cfqd->active_cic = NULL;
-	}
-
-	cfqd->dispatch_slice = 0;
-}
-
-static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted)
-{
-	struct cfq_queue *cfqq = cfqd->active_queue;
-
-	if (cfqq) {
-		/*
-		 * use deferred expiry, if there are requests in progress as
-		 * not to disturb the slice of the next queue
-		 */
-		if (cfq_cfqq_dispatched(cfqq))
-			cfq_mark_cfqq_expired(cfqq);
-		else
-			__cfq_slice_expired(cfqd, cfqq, preempted);
-	}
-}
-
 static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 
 {
+	unsigned long sl;
+
 	WARN_ON(!RB_EMPTY(&cfqq->sort_list));
 	WARN_ON(cfqq != cfqd->active_queue);
 
@@ -916,13 +900,8 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	cfq_mark_cfqq_must_dispatch(cfqq);
 	cfq_mark_cfqq_wait_request(cfqq);
 
-	if (!timer_pending(&cfqd->idle_slice_timer)) {
-		unsigned long slice_left = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle);
-
-		cfqd->idle_slice_timer.expires = jiffies + slice_left;
-		add_timer(&cfqd->idle_slice_timer);
-	}
-
+	sl = min(cfqq->slice_end - 1, (unsigned long) cfqd->cfq_slice_idle);
+	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
 	return 1;
 }
 
@@ -1006,9 +985,6 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
 	if (!cfqq)
 		goto new_queue;
 
-	if (cfq_cfqq_expired(cfqq))
-		goto new_queue;
-
 	/*
 	 * slice has expired
 	 */
@@ -1181,10 +1157,8 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 	BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
 	BUG_ON(cfq_cfqq_on_rr(cfqq));
 
-	if (unlikely(cfqd->active_queue == cfqq)) {
+	if (unlikely(cfqd->active_queue == cfqq))
 		__cfq_slice_expired(cfqd, cfqq, 0);
-		cfq_schedule_dispatch(cfqd);
-	}
 
 	cfq_put_cfqd(cfqq->cfqd);
 
@@ -1245,10 +1219,8 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic)
 
 	spin_lock(q->queue_lock);
 
-	if (unlikely(cic->cfqq == cfqd->active_queue)) {
+	if (unlikely(cic->cfqq == cfqd->active_queue))
 		__cfq_slice_expired(cfqd, cic->cfqq, 0);
-		cfq_schedule_dispatch(cfqd);
-	}
 
 	cfq_put_queue(cic->cfqq);
 	cic->cfqq = NULL;
@@ -1715,10 +1687,7 @@ static void cfq_completed_request(request_queue_t *q, struct request *rq)
 			cfqq->service_last = now;
 			cfq_resort_rr_list(cfqq, 0);
 		}
-		if (cfq_cfqq_expired(cfqq)) {
-			__cfq_slice_expired(cfqd, cfqq, 0);
-			cfq_schedule_dispatch(cfqd);
-		}
+		cfq_schedule_dispatch(cfqd);
 	}
 
 	if (cfq_crq_is_sync(crq))
diff --git a/block/elevator.c b/block/elevator.c
index c9f424d5399..24b702d649a 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -139,35 +139,16 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e,
 
 static char chosen_elevator[16];
 
-static void elevator_setup_default(void)
+static int __init elevator_setup(char *str)
 {
-	struct elevator_type *e;
-
-	/*
-	 * If default has not been set, use the compiled-in selection.
-	 */
-	if (!chosen_elevator[0])
-		strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED);
-
 	/*
 	 * Be backwards-compatible with previous kernels, so users
 	 * won't get the wrong elevator.
 	 */
-	if (!strcmp(chosen_elevator, "as"))
+	if (!strcmp(str, "as"))
 		strcpy(chosen_elevator, "anticipatory");
-
- 	/*
- 	 * If the given scheduler is not available, fall back to the default
- 	 */
- 	if ((e = elevator_find(chosen_elevator)))
-		elevator_put(e);
 	else
- 		strcpy(chosen_elevator, CONFIG_DEFAULT_IOSCHED);
-}
-
-static int __init elevator_setup(char *str)
-{
-	strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
+		strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
 	return 0;
 }
 
@@ -184,14 +165,16 @@ int elevator_init(request_queue_t *q, char *name)
 	q->end_sector = 0;
 	q->boundary_rq = NULL;
 
-	elevator_setup_default();
+	if (name && !(e = elevator_get(name)))
+		return -EINVAL;
 
-	if (!name)
-		name = chosen_elevator;
+	if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))
+		printk("I/O scheduler %s not found\n", chosen_elevator);
 
-	e = elevator_get(name);
-	if (!e)
-		return -EINVAL;
+	if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {
+		printk("Default I/O scheduler not found, using no-op\n");
+		e = elevator_get("noop");
+	}
 
 	eq = kmalloc(sizeof(struct elevator_queue), GFP_KERNEL);
 	if (!eq) {
@@ -310,7 +293,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
 
 	rq->flags &= ~REQ_STARTED;
 
-	__elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0);
+	elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
 }
 
 static void elv_drain_elevator(request_queue_t *q)
@@ -327,40 +310,11 @@ static void elv_drain_elevator(request_queue_t *q)
 	}
 }
 
-void __elv_add_request(request_queue_t *q, struct request *rq, int where,
-		       int plug)
+void elv_insert(request_queue_t *q, struct request *rq, int where)
 {
 	struct list_head *pos;
 	unsigned ordseq;
 
-	if (q->ordcolor)
-		rq->flags |= REQ_ORDERED_COLOR;
-
-	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
-		/*
-		 * toggle ordered color
-		 */
-		q->ordcolor ^= 1;
-
-		/*
-		 * barriers implicitly indicate back insertion
-		 */
-		if (where == ELEVATOR_INSERT_SORT)
-			where = ELEVATOR_INSERT_BACK;
-
-		/*
-		 * this request is scheduling boundary, update end_sector
-		 */
-		if (blk_fs_request(rq)) {
-			q->end_sector = rq_end_sector(rq);
-			q->boundary_rq = rq;
-		}
-	} else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
-		where = ELEVATOR_INSERT_BACK;
-
-	if (plug)
-		blk_plug_device(q);
-
 	rq->q = q;
 
 	switch (where) {
@@ -441,6 +395,42 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 	}
 }
 
+void __elv_add_request(request_queue_t *q, struct request *rq, int where,
+		       int plug)
+{
+	if (q->ordcolor)
+		rq->flags |= REQ_ORDERED_COLOR;
+
+	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
+		/*
+		 * toggle ordered color
+		 */
+		if (blk_barrier_rq(rq))
+			q->ordcolor ^= 1;
+
+		/*
+		 * barriers implicitly indicate back insertion
+		 */
+		if (where == ELEVATOR_INSERT_SORT)
+			where = ELEVATOR_INSERT_BACK;
+
+		/*
+		 * this request is scheduling boundary, update
+		 * end_sector
+		 */
+		if (blk_fs_request(rq)) {
+			q->end_sector = rq_end_sector(rq);
+			q->boundary_rq = rq;
+		}
+	} else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
+		where = ELEVATOR_INSERT_BACK;
+
+	if (plug)
+		blk_plug_device(q);
+
+	elv_insert(q, rq, where);
+}
+
 void elv_add_request(request_queue_t *q, struct request *rq, int where,
 		     int plug)
 {
@@ -669,8 +659,10 @@ int elv_register(struct elevator_type *e)
 	spin_unlock_irq(&elv_list_lock);
 
 	printk(KERN_INFO "io scheduler %s registered", e->elevator_name);
-	if (!strcmp(e->elevator_name, chosen_elevator))
-		printk(" (default)");
+	if (!strcmp(e->elevator_name, chosen_elevator) ||
+			(!*chosen_elevator &&
+			 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
+				printk(" (default)");
 	printk("\n");
 	return 0;
 }
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 8e27d0ab0d7..0ef2971a9e8 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -304,6 +304,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
  * blk_queue_ordered - does this queue support ordered writes
  * @q:        the request queue
  * @ordered:  one of QUEUE_ORDERED_*
+ * @prepare_flush_fn: rq setup helper for cache flush ordered writes
  *
  * Description:
  *   For journalled file systems, doing ordered writes on a commit
@@ -332,6 +333,7 @@ int blk_queue_ordered(request_queue_t *q, unsigned ordered,
 		return -EINVAL;
 	}
 
+	q->ordered = ordered;
 	q->next_ordered = ordered;
 	q->prepare_flush_fn = prepare_flush_fn;
 
@@ -452,7 +454,7 @@ static void queue_flush(request_queue_t *q, unsigned which)
 	rq->end_io = end_io;
 	q->prepare_flush_fn(q, rq);
 
-	__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
+	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 }
 
 static inline struct request *start_ordered(request_queue_t *q,
@@ -488,7 +490,7 @@ static inline struct request *start_ordered(request_queue_t *q,
 	else
 		q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
 
-	__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
+	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 
 	if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
 		queue_flush(q, QUEUE_ORDERED_PREFLUSH);
@@ -506,7 +508,7 @@ static inline struct request *start_ordered(request_queue_t *q,
 
 int blk_do_ordered(request_queue_t *q, struct request **rqp)
 {
-	struct request *rq = *rqp, *allowed_rq;
+	struct request *rq = *rqp;
 	int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
 
 	if (!q->ordseq) {
@@ -530,32 +532,26 @@ int blk_do_ordered(request_queue_t *q, struct request **rqp)
 		}
 	}
 
+	/*
+	 * Ordered sequence in progress
+	 */
+
+	/* Special requests are not subject to ordering rules. */
+	if (!blk_fs_request(rq) &&
+	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
+		return 1;
+
 	if (q->ordered & QUEUE_ORDERED_TAG) {
+		/* Ordered by tag.  Blocking the next barrier is enough. */
 		if (is_barrier && rq != &q->bar_rq)
 			*rqp = NULL;
-		return 1;
-	}
-
-	switch (blk_ordered_cur_seq(q)) {
-	case QUEUE_ORDSEQ_PREFLUSH:
-		allowed_rq = &q->pre_flush_rq;
-		break;
-	case QUEUE_ORDSEQ_BAR:
-		allowed_rq = &q->bar_rq;
-		break;
-	case QUEUE_ORDSEQ_POSTFLUSH:
-		allowed_rq = &q->post_flush_rq;
-		break;
-	default:
-		allowed_rq = NULL;
-		break;
+	} else {
+		/* Ordered by draining.  Wait for turn. */
+		WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
+		if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
+			*rqp = NULL;
 	}
 
-	if (rq != allowed_rq &&
-	    (blk_fs_request(rq) || rq == &q->pre_flush_rq ||
-	     rq == &q->post_flush_rq))
-		*rqp = NULL;
-
 	return 1;
 }
 
@@ -629,26 +625,31 @@ static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
  *    Different hardware can have different requirements as to what pages
  *    it can do I/O directly to. A low level driver can call
  *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
- *    buffers for doing I/O to pages residing above @page. By default
- *    the block layer sets this to the highest numbered "low" memory page.
+ *    buffers for doing I/O to pages residing above @page.
  **/
 void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
 {
 	unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
-
-	/*
-	 * set appropriate bounce gfp mask -- unfortunately we don't have a
-	 * full 4GB zone, so we have to resort to low memory for any bounces.
-	 * ISA has its own < 16MB zone.
-	 */
-	if (bounce_pfn < blk_max_low_pfn) {
-		BUG_ON(dma_addr < BLK_BOUNCE_ISA);
+	int dma = 0;
+
+	q->bounce_gfp = GFP_NOIO;
+#if BITS_PER_LONG == 64
+	/* Assume anything <= 4GB can be handled by IOMMU.
+	   Actually some IOMMUs can handle everything, but I don't
+	   know of a way to test this here. */
+	if (bounce_pfn < (0xffffffff>>PAGE_SHIFT))
+		dma = 1;
+	q->bounce_pfn = max_low_pfn;
+#else
+	if (bounce_pfn < blk_max_low_pfn)
+		dma = 1;
+	q->bounce_pfn = bounce_pfn;
+#endif
+	if (dma) {
 		init_emergency_isa_pool();
 		q->bounce_gfp = GFP_NOIO | GFP_DMA;
-	} else
-		q->bounce_gfp = GFP_NOIO;
-
-	q->bounce_pfn = bounce_pfn;
+		q->bounce_pfn = bounce_pfn;
+	}
 }
 
 EXPORT_SYMBOL(blk_queue_bounce_limit);
@@ -662,7 +663,7 @@ EXPORT_SYMBOL(blk_queue_bounce_limit);
  *    Enables a low level driver to set an upper limit on the size of
  *    received requests.
  **/
-void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)
+void blk_queue_max_sectors(request_queue_t *q, unsigned int max_sectors)
 {
 	if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
 		max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
@@ -2577,6 +2578,8 @@ void disk_round_stats(struct gendisk *disk)
 	disk->stamp = now;
 }
 
+EXPORT_SYMBOL_GPL(disk_round_stats);
+
 /*
  * queue lock must be held
  */
@@ -2632,6 +2635,7 @@ EXPORT_SYMBOL(blk_put_request);
 /**
  * blk_end_sync_rq - executes a completion event on a request
  * @rq: request to complete
+ * @error: end io status of the request
  */
 void blk_end_sync_rq(struct request *rq, int error)
 {
@@ -3153,7 +3157,7 @@ static int __end_that_request_first(struct request *req, int uptodate,
 	if (blk_fs_request(req) && req->rq_disk) {
 		const int rw = rq_data_dir(req);
 
-		__disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
+		disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
 	}
 
 	total_bytes = bio_nbytes = 0;
@@ -3448,7 +3452,7 @@ int __init blk_dev_init(void)
 	iocontext_cachep = kmem_cache_create("blkdev_ioc",
 			sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
 
-	for (i = 0; i < NR_CPUS; i++)
+	for_each_cpu(i)
 		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
 
 	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index cc72210687e..24f7af9d0ab 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -310,6 +310,8 @@ static int sg_io(struct file *file, request_queue_t *q,
 	if (!rq->timeout)
 		rq->timeout = BLK_DEFAULT_TIMEOUT;
 
+	rq->retries = 0;
+
 	start_time = jiffies;
 
 	/* ignore return value. All information is passed back to caller
@@ -427,6 +429,7 @@ static int sg_scsi_ioctl(struct file *file, request_queue_t *q,
 	rq->data = buffer;
 	rq->data_len = bytes;
 	rq->flags |= REQ_BLOCK_PC;
+	rq->retries = 0;
 
 	blk_execute_rq(q, bd_disk, rq, 0);
 	err = rq->errors & 0xff;	/* only 8 bit SCSI status */