Commit 80af89ca authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] block batching fairness

From: Nick Piggin <piggin@cyberone.com.au>

This patch fixes the request batching fairness/starvation issue.  Its not
clear what is going on with 2.4, but it seems that its a problem around this
area.

Anyway, previously:

	* request queue fills up
	* process 1 calls get_request, sleeps
	* a couple of requests are freed
	* process 2 calls get_request, proceeds
	* a couple of requests are freed
	* process 2 calls get_request...

Now as unlikely as it seems, it could be a problem.  Its a fairness problem
that process 2 can skip ahead of process 1 anyway.

With the patch:

	* request queue fills up
	* any process calling get_request will sleep
	* once the queue gets below the batch watermark, processes
	  start being worken, and may allocate.


This patch includes Chris Mason's fix to only clear queue_full when all tasks
have been woken.  Previously I think starvation and unfairness could still
occur.

With this change to the blk-fair-batches patch, Chris is showing some much
improved numbers for 2.4 - 170 ms max wait vs 2700ms without blk-fair-batches
for a dbench 90 run.  He didn't indicate how much difference his patch alone
made, but it is an important fix I think.
parent f67198fb
......@@ -53,7 +53,7 @@ unsigned long blk_max_low_pfn, blk_max_pfn;
static inline int batch_requests(struct request_queue *q)
{
return q->nr_requests - min(q->nr_requests / 8, 8UL);
return q->nr_requests - min(q->nr_requests / 8, 8UL) - 1;
}
/*
......@@ -1309,13 +1309,16 @@ static inline struct request *blk_alloc_request(request_queue_t *q,int gfp_mask)
/*
* Get a free request, queue_lock must not be held
*/
static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
static struct request *
get_request(request_queue_t *q, int rw, int gfp_mask, int force)
{
struct request *rq = NULL;
struct request_list *rl = &q->rq;
spin_lock_irq(q->queue_lock);
if (rl->count[rw] >= q->nr_requests && !elv_may_queue(q, rw)) {
if (rl->count[rw] == q->nr_requests)
blk_set_queue_full(q, rw);
if (blk_queue_full(q, rw) && !force && !elv_may_queue(q, rw)) {
spin_unlock_irq(q->queue_lock);
goto out;
}
......@@ -1330,6 +1333,14 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
rl->count[rw]--;
if (rl->count[rw] < queue_congestion_off_threshold(q))
clear_queue_congested(q, rw);
if (rl->count[rw] <= batch_requests(q)) {
if (waitqueue_active(&rl->wait[rw]))
wake_up(&rl->wait[rw]);
else
blk_clear_queue_full(q, rw);
}
spin_unlock_irq(q->queue_lock);
goto out;
}
......@@ -1366,26 +1377,22 @@ static struct request *get_request_wait(request_queue_t *q, int rw)
{
DEFINE_WAIT(wait);
struct request *rq;
int waited = 0;
generic_unplug_device(q);
do {
rq = get_request(q, rw, GFP_NOIO);
struct request_list *rl = &q->rq;
if (!rq) {
struct request_list *rl = &q->rq;
prepare_to_wait_exclusive(&rl->wait[rw], &wait,
TASK_UNINTERRUPTIBLE);
prepare_to_wait_exclusive(&rl->wait[rw], &wait,
TASK_UNINTERRUPTIBLE);
/*
* If _all_ the requests were suddenly returned then
* no wakeup will be delivered. So now we're on the
* waitqueue, go check for that.
*/
rq = get_request(q, rw, GFP_NOIO);
if (!rq)
io_schedule();
finish_wait(&rl->wait[rw], &wait);
rq = get_request(q, rw, GFP_NOIO, waited);
if (!rq) {
io_schedule();
waited = 1;
}
finish_wait(&rl->wait[rw], &wait);
} while (!rq);
return rq;
......@@ -1397,10 +1404,10 @@ struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask)
BUG_ON(rw != READ && rw != WRITE);
rq = get_request(q, rw, gfp_mask);
if (!rq && (gfp_mask & __GFP_WAIT))
if (gfp_mask & __GFP_WAIT)
rq = get_request_wait(q, rw);
else
rq = get_request(q, rw, gfp_mask, 0);
return rq;
}
......@@ -1551,9 +1558,13 @@ void __blk_put_request(request_queue_t *q, struct request *req)
rl->count[rw]--;
if (rl->count[rw] < queue_congestion_off_threshold(q))
clear_queue_congested(q, rw);
if (rl->count[rw] < batch_requests(q) &&
waitqueue_active(&rl->wait[rw]))
wake_up(&rl->wait[rw]);
if (rl->count[rw] <= batch_requests(q)) {
if (waitqueue_active(&rl->wait[rw]))
wake_up(&rl->wait[rw]);
else
blk_clear_queue_full(q, rw);
}
}
}
......@@ -1796,7 +1807,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
freereq = NULL;
} else {
spin_unlock_irq(q->queue_lock);
if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) {
if ((freereq = get_request(q, rw, GFP_ATOMIC, 0)) == NULL) {
/*
* READA bit set
*/
......@@ -1904,8 +1915,7 @@ static inline void blk_partition_remap(struct bio *bio)
* bio happens to be merged with someone else, and may change bi_dev and
* bi_sector for remaps as it sees fit. So the values of these fields
* should NOT be depended on after the call to generic_make_request.
*
* */
*/
void generic_make_request(struct bio *bio)
{
request_queue_t *q;
......@@ -2415,6 +2425,19 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
clear_queue_congested(q, WRITE);
if (rl->count[READ] >= q->nr_requests) {
blk_set_queue_full(q, READ);
} else if (rl->count[READ] <= batch_requests(q)) {
blk_clear_queue_full(q, READ);
wake_up_all(&rl->wait[READ]);
}
if (rl->count[WRITE] >= q->nr_requests) {
blk_set_queue_full(q, WRITE);
} else if (rl->count[WRITE] <= batch_requests(q)) {
blk_clear_queue_full(q, WRITE);
wake_up_all(&rl->wait[WRITE]);
}
return ret;
}
......
......@@ -307,6 +307,8 @@ struct request_queue
#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
#define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
#define QUEUE_FLAG_READFULL 3 /* write queue has been filled */
#define QUEUE_FLAG_WRITEFULL 4 /* read queue has been filled */
#define blk_queue_plugged(q) !list_empty(&(q)->plug_list)
#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
......@@ -322,6 +324,30 @@ struct request_queue
#define rq_data_dir(rq) ((rq)->flags & 1)
static inline int blk_queue_full(struct request_queue *q, int rw)
{
if (rw == READ)
return test_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
return test_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
}
static inline void blk_set_queue_full(struct request_queue *q, int rw)
{
if (rw == READ)
set_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
else
set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
}
static inline void blk_clear_queue_full(struct request_queue *q, int rw)
{
if (rw == READ)
clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags);
else
clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags);
}
/*
* mergeable request must not have _NOMERGE or _BARRIER bit set, nor may
* it already be started by driver.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment