Commit 4ac845a2 authored by Jens Axboe's avatar Jens Axboe

block: cfq: make the io contect sharing lockless

The io context sharing introduced a per-ioc spinlock, that would protect
the cfq io context lookup. That is a regression from the original, since
we never needed any locking there because the ioc/cic were process private.

The cic lookup is changed from an rbtree construct to a radix tree, which
we can then use RCU to make the reader side lockless. That is the performance
critical path, modifying the radix tree is only done on process creation
(when that process first does IO, actually) and on process exit (if that
process has done IO).

As it so happens, radix trees are also much faster for this type of
lookup where the key is a pointer. It's a very sparse tree.
Signed-off-by: default avatarJens Axboe <jens.axboe@oracle.com>
parent 66dac98e
This diff is collapsed.
...@@ -3853,6 +3853,21 @@ int __init blk_dev_init(void) ...@@ -3853,6 +3853,21 @@ int __init blk_dev_init(void)
return 0; return 0;
} }
static void cfq_dtor(struct io_context *ioc)
{
struct cfq_io_context *cic[1];
int r;
/*
* We don't have a specific key to lookup with, so use the gang
* lookup to just retrieve the first item stored. The cfq exit
* function will iterate the full tree, so any member will do.
*/
r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1);
if (r > 0)
cic[0]->dtor(ioc);
}
/* /*
* IO Context helper functions. put_io_context() returns 1 if there are no * IO Context helper functions. put_io_context() returns 1 if there are no
* more users of this io context, 0 otherwise. * more users of this io context, 0 otherwise.
...@@ -3865,18 +3880,11 @@ int put_io_context(struct io_context *ioc) ...@@ -3865,18 +3880,11 @@ int put_io_context(struct io_context *ioc)
BUG_ON(atomic_read(&ioc->refcount) == 0); BUG_ON(atomic_read(&ioc->refcount) == 0);
if (atomic_dec_and_test(&ioc->refcount)) { if (atomic_dec_and_test(&ioc->refcount)) {
struct cfq_io_context *cic;
rcu_read_lock(); rcu_read_lock();
if (ioc->aic && ioc->aic->dtor) if (ioc->aic && ioc->aic->dtor)
ioc->aic->dtor(ioc->aic); ioc->aic->dtor(ioc->aic);
if (ioc->cic_root.rb_node != NULL) {
struct rb_node *n = rb_first(&ioc->cic_root);
cic = rb_entry(n, struct cfq_io_context, rb_node);
cic->dtor(ioc);
}
rcu_read_unlock(); rcu_read_unlock();
cfq_dtor(ioc);
kmem_cache_free(iocontext_cachep, ioc); kmem_cache_free(iocontext_cachep, ioc);
return 1; return 1;
...@@ -3885,11 +3893,26 @@ int put_io_context(struct io_context *ioc) ...@@ -3885,11 +3893,26 @@ int put_io_context(struct io_context *ioc)
} }
EXPORT_SYMBOL(put_io_context); EXPORT_SYMBOL(put_io_context);
static void cfq_exit(struct io_context *ioc)
{
struct cfq_io_context *cic[1];
int r;
rcu_read_lock();
/*
* See comment for cfq_dtor()
*/
r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1);
rcu_read_unlock();
if (r > 0)
cic[0]->exit(ioc);
}
/* Called by the exitting task */ /* Called by the exitting task */
void exit_io_context(void) void exit_io_context(void)
{ {
struct io_context *ioc; struct io_context *ioc;
struct cfq_io_context *cic;
task_lock(current); task_lock(current);
ioc = current->io_context; ioc = current->io_context;
...@@ -3899,11 +3922,7 @@ void exit_io_context(void) ...@@ -3899,11 +3922,7 @@ void exit_io_context(void)
if (atomic_dec_and_test(&ioc->nr_tasks)) { if (atomic_dec_and_test(&ioc->nr_tasks)) {
if (ioc->aic && ioc->aic->exit) if (ioc->aic && ioc->aic->exit)
ioc->aic->exit(ioc->aic); ioc->aic->exit(ioc->aic);
if (ioc->cic_root.rb_node != NULL) { cfq_exit(ioc);
cic = rb_entry(rb_first(&ioc->cic_root),
struct cfq_io_context, rb_node);
cic->exit(ioc);
}
put_io_context(ioc); put_io_context(ioc);
} }
...@@ -3923,7 +3942,7 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) ...@@ -3923,7 +3942,7 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
ret->last_waited = jiffies; /* doesn't matter... */ ret->last_waited = jiffies; /* doesn't matter... */
ret->nr_batch_requests = 0; /* because this is 0 */ ret->nr_batch_requests = 0; /* because this is 0 */
ret->aic = NULL; ret->aic = NULL;
ret->cic_root.rb_node = NULL; INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
ret->ioc_data = NULL; ret->ioc_data = NULL;
} }
......
#ifndef IOCONTEXT_H #ifndef IOCONTEXT_H
#define IOCONTEXT_H #define IOCONTEXT_H
#include <linux/radix-tree.h>
/* /*
* This is the per-process anticipatory I/O scheduler state. * This is the per-process anticipatory I/O scheduler state.
*/ */
...@@ -29,8 +31,8 @@ struct as_io_context { ...@@ -29,8 +31,8 @@ struct as_io_context {
struct cfq_queue; struct cfq_queue;
struct cfq_io_context { struct cfq_io_context {
struct rb_node rb_node;
void *key; void *key;
unsigned long dead_key;
struct cfq_queue *cfqq[2]; struct cfq_queue *cfqq[2];
...@@ -74,7 +76,7 @@ struct io_context { ...@@ -74,7 +76,7 @@ struct io_context {
int nr_batch_requests; /* Number of requests left in the batch */ int nr_batch_requests; /* Number of requests left in the batch */
struct as_io_context *aic; struct as_io_context *aic;
struct rb_root cic_root; struct radix_tree_root radix_root;
void *ioc_data; void *ioc_data;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment