Commit 35a82d1a authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

[PATCH] optimise loop driver a bit

Looks like locking can be optimised quite a lot.  Increase lock widths
slightly so lo_lock is taken fewer times per request.  Also it was quite
trivial to cover lo_pending with that lock, and remove the atomic
requirement.  This also makes memory ordering explicitly correct, which is
nice (not that I particularly saw any mem ordering bugs).

Test was reading 4 250MB files in parallel on ext2-on-tmpfs filesystem (1K
block size, 4K page size).  System is 2 socket Xeon with HT (4 thread).

intel:/home/npiggin# umount /dev/loop0 ; mount /dev/loop0 /mnt/loop ; /usr/bin/time ./mtloop.sh

Before:
0.24user 5.51system 0:02.84elapsed 202%CPU (0avgtext+0avgdata 0maxresident)k
0.19user 5.52system 0:02.88elapsed 198%CPU (0avgtext+0avgdata 0maxresident)k
0.19user 5.57system 0:02.89elapsed 198%CPU (0avgtext+0avgdata 0maxresident)k
0.22user 5.51system 0:02.90elapsed 197%CPU (0avgtext+0avgdata 0maxresident)k
0.19user 5.44system 0:02.91elapsed 193%CPU (0avgtext+0avgdata 0maxresident)k

After:
0.07user 2.34system 0:01.68elapsed 143%CPU (0avgtext+0avgdata 0maxresident)k
0.06user 2.37system 0:01.68elapsed 144%CPU (0avgtext+0avgdata 0maxresident)k
0.06user 2.39system 0:01.68elapsed 145%CPU (0avgtext+0avgdata 0maxresident)k
0.06user 2.36system 0:01.68elapsed 144%CPU (0avgtext+0avgdata 0maxresident)k
0.06user 2.42system 0:01.68elapsed 147%CPU (0avgtext+0avgdata 0maxresident)k
Signed-off-by: default avatarNick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent ab4af03a
...@@ -472,17 +472,11 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) ...@@ -472,17 +472,11 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
*/ */
static void loop_add_bio(struct loop_device *lo, struct bio *bio) static void loop_add_bio(struct loop_device *lo, struct bio *bio)
{ {
unsigned long flags;
spin_lock_irqsave(&lo->lo_lock, flags);
if (lo->lo_biotail) { if (lo->lo_biotail) {
lo->lo_biotail->bi_next = bio; lo->lo_biotail->bi_next = bio;
lo->lo_biotail = bio; lo->lo_biotail = bio;
} else } else
lo->lo_bio = lo->lo_biotail = bio; lo->lo_bio = lo->lo_biotail = bio;
spin_unlock_irqrestore(&lo->lo_lock, flags);
up(&lo->lo_bh_mutex);
} }
/* /*
...@@ -492,14 +486,12 @@ static struct bio *loop_get_bio(struct loop_device *lo) ...@@ -492,14 +486,12 @@ static struct bio *loop_get_bio(struct loop_device *lo)
{ {
struct bio *bio; struct bio *bio;
spin_lock_irq(&lo->lo_lock);
if ((bio = lo->lo_bio)) { if ((bio = lo->lo_bio)) {
if (bio == lo->lo_biotail) if (bio == lo->lo_biotail)
lo->lo_biotail = NULL; lo->lo_biotail = NULL;
lo->lo_bio = bio->bi_next; lo->lo_bio = bio->bi_next;
bio->bi_next = NULL; bio->bi_next = NULL;
} }
spin_unlock_irq(&lo->lo_lock);
return bio; return bio;
} }
...@@ -509,35 +501,28 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio) ...@@ -509,35 +501,28 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio)
struct loop_device *lo = q->queuedata; struct loop_device *lo = q->queuedata;
int rw = bio_rw(old_bio); int rw = bio_rw(old_bio);
if (!lo) if (rw == READA)
goto out; rw = READ;
BUG_ON(!lo || (rw != READ && rw != WRITE));
spin_lock_irq(&lo->lo_lock); spin_lock_irq(&lo->lo_lock);
if (lo->lo_state != Lo_bound) if (lo->lo_state != Lo_bound)
goto inactive; goto out;
atomic_inc(&lo->lo_pending); if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
spin_unlock_irq(&lo->lo_lock); goto out;
lo->lo_pending++;
if (rw == WRITE) {
if (lo->lo_flags & LO_FLAGS_READ_ONLY)
goto err;
} else if (rw == READA) {
rw = READ;
} else if (rw != READ) {
printk(KERN_ERR "loop: unknown command (%x)\n", rw);
goto err;
}
loop_add_bio(lo, old_bio); loop_add_bio(lo, old_bio);
return 0; spin_unlock_irq(&lo->lo_lock);
err:
if (atomic_dec_and_test(&lo->lo_pending))
up(&lo->lo_bh_mutex); up(&lo->lo_bh_mutex);
return 0;
out: out:
if (lo->lo_pending == 0)
up(&lo->lo_bh_mutex);
spin_unlock_irq(&lo->lo_lock);
bio_io_error(old_bio, old_bio->bi_size); bio_io_error(old_bio, old_bio->bi_size);
return 0; return 0;
inactive:
spin_unlock_irq(&lo->lo_lock);
goto out;
} }
/* /*
...@@ -560,13 +545,11 @@ static void do_loop_switch(struct loop_device *, struct switch_request *); ...@@ -560,13 +545,11 @@ static void do_loop_switch(struct loop_device *, struct switch_request *);
static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
{ {
int ret;
if (unlikely(!bio->bi_bdev)) { if (unlikely(!bio->bi_bdev)) {
do_loop_switch(lo, bio->bi_private); do_loop_switch(lo, bio->bi_private);
bio_put(bio); bio_put(bio);
} else { } else {
ret = do_bio_filebacked(lo, bio); int ret = do_bio_filebacked(lo, bio);
bio_endio(bio, bio->bi_size, ret); bio_endio(bio, bio->bi_size, ret);
} }
} }
...@@ -594,7 +577,7 @@ static int loop_thread(void *data) ...@@ -594,7 +577,7 @@ static int loop_thread(void *data)
set_user_nice(current, -20); set_user_nice(current, -20);
lo->lo_state = Lo_bound; lo->lo_state = Lo_bound;
atomic_inc(&lo->lo_pending); lo->lo_pending = 1;
/* /*
* up sem, we are running * up sem, we are running
...@@ -602,26 +585,37 @@ static int loop_thread(void *data) ...@@ -602,26 +585,37 @@ static int loop_thread(void *data)
up(&lo->lo_sem); up(&lo->lo_sem);
for (;;) { for (;;) {
down_interruptible(&lo->lo_bh_mutex); int pending;
/*
* interruptible just to not contribute to load avg
*/
if (down_interruptible(&lo->lo_bh_mutex))
continue;
spin_lock_irq(&lo->lo_lock);
/* /*
* could be upped because of tear-down, not because of * could be upped because of tear-down, not pending work
* pending work
*/ */
if (!atomic_read(&lo->lo_pending)) if (unlikely(!lo->lo_pending)) {
spin_unlock_irq(&lo->lo_lock);
break; break;
}
bio = loop_get_bio(lo); bio = loop_get_bio(lo);
if (!bio) { lo->lo_pending--;
printk("loop: missing bio\n"); pending = lo->lo_pending;
continue; spin_unlock_irq(&lo->lo_lock);
}
BUG_ON(!bio);
loop_handle_bio(lo, bio); loop_handle_bio(lo, bio);
/* /*
* upped both for pending work and tear-down, lo_pending * upped both for pending work and tear-down, lo_pending
* will hit zero then * will hit zero then
*/ */
if (atomic_dec_and_test(&lo->lo_pending)) if (unlikely(!pending))
break; break;
} }
...@@ -900,7 +894,8 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) ...@@ -900,7 +894,8 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
spin_lock_irq(&lo->lo_lock); spin_lock_irq(&lo->lo_lock);
lo->lo_state = Lo_rundown; lo->lo_state = Lo_rundown;
if (atomic_dec_and_test(&lo->lo_pending)) lo->lo_pending--;
if (!lo->lo_pending)
up(&lo->lo_bh_mutex); up(&lo->lo_bh_mutex);
spin_unlock_irq(&lo->lo_lock); spin_unlock_irq(&lo->lo_lock);
......
...@@ -61,7 +61,7 @@ struct loop_device { ...@@ -61,7 +61,7 @@ struct loop_device {
struct semaphore lo_sem; struct semaphore lo_sem;
struct semaphore lo_ctl_mutex; struct semaphore lo_ctl_mutex;
struct semaphore lo_bh_mutex; struct semaphore lo_bh_mutex;
atomic_t lo_pending; int lo_pending;
request_queue_t *lo_queue; request_queue_t *lo_queue;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment