From fd0928df98b9578be8a786ac0cb78a47a5e17a20 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 24 Jan 2008 08:52:45 +0100 Subject: ioprio: move io priority from task_struct to io_context This is where it belongs and then it doesn't take up space for a process that doesn't do IO. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 81 ++++------------------------------------------- include/linux/init_task.h | 1 - include/linux/iocontext.h | 79 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/ioprio.h | 13 ++++---- include/linux/sched.h | 1 - 5 files changed, 93 insertions(+), 82 deletions(-) create mode 100644 include/linux/iocontext.h (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 49b7a4c31a6..510a18ba1ec 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -34,83 +34,10 @@ struct sg_io_hdr; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ -/* - * This is the per-process anticipatory I/O scheduler state. - */ -struct as_io_context { - spinlock_t lock; - - void (*dtor)(struct as_io_context *aic); /* destructor */ - void (*exit)(struct as_io_context *aic); /* called on task exit */ - - unsigned long state; - atomic_t nr_queued; /* queued reads & sync writes */ - atomic_t nr_dispatched; /* number of requests gone to the drivers */ - - /* IO History tracking */ - /* Thinktime */ - unsigned long last_end_request; - unsigned long ttime_total; - unsigned long ttime_samples; - unsigned long ttime_mean; - /* Layout pattern */ - unsigned int seek_samples; - sector_t last_request_pos; - u64 seek_total; - sector_t seek_mean; -}; - -struct cfq_queue; -struct cfq_io_context { - struct rb_node rb_node; - void *key; - - struct cfq_queue *cfqq[2]; - - struct io_context *ioc; - - unsigned long last_end_request; - sector_t last_request_pos; - - unsigned long ttime_total; - unsigned long ttime_samples; - unsigned long ttime_mean; - - unsigned int seek_samples; - u64 seek_total; - sector_t seek_mean; - - struct list_head queue_list; - - void (*dtor)(struct io_context *); /* destructor */ - void (*exit)(struct io_context *); /* called on task exit */ -}; - -/* - * This is the per-process I/O subsystem state. It is refcounted and - * kmalloc'ed. Currently all fields are modified in process io context - * (apart from the atomic refcount), so require no locking. - */ -struct io_context { - atomic_t refcount; - struct task_struct *task; - - unsigned int ioprio_changed; - - /* - * For request batching - */ - unsigned long last_waited; /* Time last woken after wait for request */ - int nr_batch_requests; /* Number of requests left in the batch */ - - struct as_io_context *aic; - struct rb_root cic_root; - void *ioc_data; -}; - void put_io_context(struct io_context *ioc); void exit_io_context(void); struct io_context *get_io_context(gfp_t gfp_flags, int node); +struct io_context *alloc_io_context(gfp_t gfp_flags, int node); void copy_io_context(struct io_context **pdst, struct io_context **psrc); void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); @@ -894,6 +821,12 @@ static inline void exit_io_context(void) { } +static inline int put_io_context(struct io_context *ioc) +{ + return 1; +} + + #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 796019b22b6..e6b3f708067 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -137,7 +137,6 @@ extern struct group_info init_groups; .time_slice = HZ, \ .nr_cpus_allowed = NR_CPUS, \ }, \ - .ioprio = 0, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \ diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h new file mode 100644 index 00000000000..186807ea62e --- /dev/null +++ b/include/linux/iocontext.h @@ -0,0 +1,79 @@ +#ifndef IOCONTEXT_H +#define IOCONTEXT_H + +/* + * This is the per-process anticipatory I/O scheduler state. + */ +struct as_io_context { + spinlock_t lock; + + void (*dtor)(struct as_io_context *aic); /* destructor */ + void (*exit)(struct as_io_context *aic); /* called on task exit */ + + unsigned long state; + atomic_t nr_queued; /* queued reads & sync writes */ + atomic_t nr_dispatched; /* number of requests gone to the drivers */ + + /* IO History tracking */ + /* Thinktime */ + unsigned long last_end_request; + unsigned long ttime_total; + unsigned long ttime_samples; + unsigned long ttime_mean; + /* Layout pattern */ + unsigned int seek_samples; + sector_t last_request_pos; + u64 seek_total; + sector_t seek_mean; +}; + +struct cfq_queue; +struct cfq_io_context { + struct rb_node rb_node; + void *key; + + struct cfq_queue *cfqq[2]; + + struct io_context *ioc; + + unsigned long last_end_request; + sector_t last_request_pos; + + unsigned long ttime_total; + unsigned long ttime_samples; + unsigned long ttime_mean; + + unsigned int seek_samples; + u64 seek_total; + sector_t seek_mean; + + struct list_head queue_list; + + void (*dtor)(struct io_context *); /* destructor */ + void (*exit)(struct io_context *); /* called on task exit */ +}; + +/* + * This is the per-process I/O subsystem state. It is refcounted and + * kmalloc'ed. Currently all fields are modified in process io context + * (apart from the atomic refcount), so require no locking. + */ +struct io_context { + atomic_t refcount; + struct task_struct *task; + + unsigned short ioprio; + unsigned short ioprio_changed; + + /* + * For request batching + */ + unsigned long last_waited; /* Time last woken after wait for request */ + int nr_batch_requests; /* Number of requests left in the batch */ + + struct as_io_context *aic; + struct rb_root cic_root; + void *ioc_data; +}; + +#endif diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index baf29387cab..2a3bb1bb743 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -2,6 +2,7 @@ #define IOPRIO_H #include +#include /* * Gives us 8 prio classes with 13-bits of data for each class @@ -45,18 +46,18 @@ enum { * the cpu scheduler nice value to an io priority */ #define IOPRIO_NORM (4) -static inline int task_ioprio(struct task_struct *task) +static inline int task_ioprio(struct io_context *ioc) { - if (ioprio_valid(task->ioprio)) - return IOPRIO_PRIO_DATA(task->ioprio); + if (ioprio_valid(ioc->ioprio)) + return IOPRIO_PRIO_DATA(ioc->ioprio); return IOPRIO_NORM; } -static inline int task_ioprio_class(struct task_struct *task) +static inline int task_ioprio_class(struct io_context *ioc) { - if (ioprio_valid(task->ioprio)) - return IOPRIO_PRIO_CLASS(task->ioprio); + if (ioprio_valid(ioc->ioprio)) + return IOPRIO_PRIO_CLASS(ioc->ioprio); return IOPRIO_CLASS_BE; } diff --git a/include/linux/sched.h b/include/linux/sched.h index df5b24ee80b..80837e7d527 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -975,7 +975,6 @@ struct task_struct { struct hlist_head preempt_notifiers; #endif - unsigned short ioprio; /* * fpu_counter contains the number of consecutive context switches * that the FPU is used. If this is over a threshold, the lazy fpu -- cgit v1.2.3 From d38ecf935fcb10264a6bc190855d9595165e6eeb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 24 Jan 2008 08:53:35 +0100 Subject: io context sharing: preliminary support Detach task state from ioc, instead keep track of how many processes are accessing the ioc. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- include/linux/iocontext.h | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 510a18ba1ec..2483a05231c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -34,7 +34,7 @@ struct sg_io_hdr; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ -void put_io_context(struct io_context *ioc); +int put_io_context(struct io_context *ioc); void exit_io_context(void); struct io_context *get_io_context(gfp_t gfp_flags, int node); struct io_context *alloc_io_context(gfp_t gfp_flags, int node); diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 186807ea62e..cd44d458124 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -54,13 +54,15 @@ struct cfq_io_context { }; /* - * This is the per-process I/O subsystem state. It is refcounted and - * kmalloc'ed. Currently all fields are modified in process io context - * (apart from the atomic refcount), so require no locking. + * I/O subsystem state of the associated processes. It is refcounted + * and kmalloc'ed. These could be shared between processes. */ struct io_context { atomic_t refcount; - struct task_struct *task; + atomic_t nr_tasks; + + /* all the fields below are protected by this lock */ + spinlock_t lock; unsigned short ioprio; unsigned short ioprio_changed; @@ -76,4 +78,16 @@ struct io_context { void *ioc_data; }; +static inline struct io_context *ioc_task_link(struct io_context *ioc) +{ + /* + * if ref count is zero, don't allow sharing (ioc is going away, it's + * a race). + */ + if (ioc && atomic_inc_not_zero(&ioc->refcount)) + return ioc; + + return NULL; +} + #endif -- cgit v1.2.3 From 4ac845a2e9a816ed5a7b301f56dcc0a3d0b1ba4d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 24 Jan 2008 08:44:49 +0100 Subject: block: cfq: make the io contect sharing lockless The io context sharing introduced a per-ioc spinlock, that would protect the cfq io context lookup. That is a regression from the original, since we never needed any locking there because the ioc/cic were process private. The cic lookup is changed from an rbtree construct to a radix tree, which we can then use RCU to make the reader side lockless. That is the performance critical path, modifying the radix tree is only done on process creation (when that process first does IO, actually) and on process exit (if that process has done IO). As it so happens, radix trees are also much faster for this type of lookup where the key is a pointer. It's a very sparse tree. Signed-off-by: Jens Axboe --- include/linux/iocontext.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index cd44d458124..593b222d9dc 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -1,6 +1,8 @@ #ifndef IOCONTEXT_H #define IOCONTEXT_H +#include + /* * This is the per-process anticipatory I/O scheduler state. */ @@ -29,8 +31,8 @@ struct as_io_context { struct cfq_queue; struct cfq_io_context { - struct rb_node rb_node; void *key; + unsigned long dead_key; struct cfq_queue *cfqq[2]; @@ -74,7 +76,7 @@ struct io_context { int nr_batch_requests; /* Number of requests left in the batch */ struct as_io_context *aic; - struct rb_root cic_root; + struct radix_tree_root radix_root; void *ioc_data; }; -- cgit v1.2.3 From fadad878cc0640cc9cd5569998bf54b693f7b38b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 24 Jan 2008 08:54:47 +0100 Subject: kernel: add CLONE_IO to specifically request sharing of IO contexts syslets (or other threads/processes that want io context sharing) can set this to enforce sharing of io context. Signed-off-by: Jens Axboe --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 80837e7d527..2d0546e884e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -27,6 +27,7 @@ #define CLONE_NEWUSER 0x10000000 /* New user namespace */ #define CLONE_NEWPID 0x20000000 /* New pid namespace */ #define CLONE_NEWNET 0x40000000 /* New network namespace */ +#define CLONE_IO 0x80000000 /* Clone io context */ /* * Scheduling policies -- cgit v1.2.3