Commit af6d7b76 authored by NeilBrown's avatar NeilBrown

md/raid1: improve handling of pages allocated for write-behind.

The current handling and freeing of these pages is a bit fragile.
We only keep the list of allocated pages in each bio, so we need to
still have a valid bio when freeing the pages, which is a bit clumsy.

So simply store the allocated page list in the r1_bio so it can easily
be found and freed when we are finished with the r1_bio.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 7ca78d57
...@@ -297,23 +297,24 @@ static void raid1_end_read_request(struct bio *bio, int error) ...@@ -297,23 +297,24 @@ static void raid1_end_read_request(struct bio *bio, int error)
rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
} }
static void r1_bio_write_done(r1bio_t *r1_bio, int vcnt, struct bio_vec *bv, static void r1_bio_write_done(r1bio_t *r1_bio)
int behind)
{ {
if (atomic_dec_and_test(&r1_bio->remaining)) if (atomic_dec_and_test(&r1_bio->remaining))
{ {
/* it really is the end of this request */ /* it really is the end of this request */
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
/* free extra copy of the data pages */ /* free extra copy of the data pages */
int i = vcnt; int i = r1_bio->behind_page_count;
while (i--) while (i--)
safe_put_page(bv[i].bv_page); safe_put_page(r1_bio->behind_pages[i]);
kfree(r1_bio->behind_pages);
r1_bio->behind_pages = NULL;
} }
/* clear the bitmap if all writes complete successfully */ /* clear the bitmap if all writes complete successfully */
bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
r1_bio->sectors, r1_bio->sectors,
!test_bit(R1BIO_Degraded, &r1_bio->state), !test_bit(R1BIO_Degraded, &r1_bio->state),
behind); test_bit(R1BIO_BehindIO, &r1_bio->state));
md_write_end(r1_bio->mddev); md_write_end(r1_bio->mddev);
raid_end_bio_io(r1_bio); raid_end_bio_io(r1_bio);
} }
...@@ -386,7 +387,7 @@ static void raid1_end_write_request(struct bio *bio, int error) ...@@ -386,7 +387,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
* Let's see if all mirrored write operations have finished * Let's see if all mirrored write operations have finished
* already. * already.
*/ */
r1_bio_write_done(r1_bio, bio->bi_vcnt, bio->bi_io_vec, behind); r1_bio_write_done(r1_bio);
if (to_put) if (to_put)
bio_put(to_put); bio_put(to_put);
...@@ -660,37 +661,36 @@ static void unfreeze_array(conf_t *conf) ...@@ -660,37 +661,36 @@ static void unfreeze_array(conf_t *conf)
/* duplicate the data pages for behind I/O /* duplicate the data pages for behind I/O
* We return a list of bio_vec rather than just page pointers
* as it makes freeing easier
*/ */
static struct bio_vec *alloc_behind_pages(struct bio *bio) static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio)
{ {
int i; int i;
struct bio_vec *bvec; struct bio_vec *bvec;
struct bio_vec *pages = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec), struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*),
GFP_NOIO); GFP_NOIO);
if (unlikely(!pages)) if (unlikely(!pages))
goto do_sync_io; return;
bio_for_each_segment(bvec, bio, i) { bio_for_each_segment(bvec, bio, i) {
pages[i].bv_page = alloc_page(GFP_NOIO); pages[i] = alloc_page(GFP_NOIO);
if (unlikely(!pages[i].bv_page)) if (unlikely(!pages[i]))
goto do_sync_io; goto do_sync_io;
memcpy(kmap(pages[i].bv_page) + bvec->bv_offset, memcpy(kmap(pages[i]) + bvec->bv_offset,
kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len); kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
kunmap(pages[i].bv_page); kunmap(pages[i]);
kunmap(bvec->bv_page); kunmap(bvec->bv_page);
} }
r1_bio->behind_pages = pages;
return pages; r1_bio->behind_page_count = bio->bi_vcnt;
set_bit(R1BIO_BehindIO, &r1_bio->state);
return;
do_sync_io: do_sync_io:
if (pages) for (i = 0; i < bio->bi_vcnt; i++)
for (i = 0; i < bio->bi_vcnt && pages[i].bv_page; i++) if (pages[i])
put_page(pages[i].bv_page); put_page(pages[i]);
kfree(pages); kfree(pages);
PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
return NULL;
} }
static int make_request(mddev_t *mddev, struct bio * bio) static int make_request(mddev_t *mddev, struct bio * bio)
...@@ -702,7 +702,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) ...@@ -702,7 +702,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
int i, targets = 0, disks; int i, targets = 0, disks;
struct bitmap *bitmap; struct bitmap *bitmap;
unsigned long flags; unsigned long flags;
struct bio_vec *behind_pages = NULL;
const int rw = bio_data_dir(bio); const int rw = bio_data_dir(bio);
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
...@@ -855,9 +854,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) ...@@ -855,9 +854,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
if (bitmap && if (bitmap &&
(atomic_read(&bitmap->behind_writes) (atomic_read(&bitmap->behind_writes)
< mddev->bitmap_info.max_write_behind) && < mddev->bitmap_info.max_write_behind) &&
!waitqueue_active(&bitmap->behind_wait) && !waitqueue_active(&bitmap->behind_wait))
(behind_pages = alloc_behind_pages(bio)) != NULL) alloc_behind_pages(bio, r1_bio);
set_bit(R1BIO_BehindIO, &r1_bio->state);
atomic_set(&r1_bio->remaining, 1); atomic_set(&r1_bio->remaining, 1);
atomic_set(&r1_bio->behind_remaining, 0); atomic_set(&r1_bio->behind_remaining, 0);
...@@ -878,7 +876,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) ...@@ -878,7 +876,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
mbio->bi_rw = WRITE | do_flush_fua | do_sync; mbio->bi_rw = WRITE | do_flush_fua | do_sync;
mbio->bi_private = r1_bio; mbio->bi_private = r1_bio;
if (behind_pages) { if (r1_bio->behind_pages) {
struct bio_vec *bvec; struct bio_vec *bvec;
int j; int j;
...@@ -890,7 +888,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) ...@@ -890,7 +888,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
* them all * them all
*/ */
__bio_for_each_segment(bvec, mbio, j, 0) __bio_for_each_segment(bvec, mbio, j, 0)
bvec->bv_page = behind_pages[j].bv_page; bvec->bv_page = r1_bio->behind_pages[j];
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
atomic_inc(&r1_bio->behind_remaining); atomic_inc(&r1_bio->behind_remaining);
} }
...@@ -900,8 +898,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) ...@@ -900,8 +898,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
bio_list_add(&conf->pending_bio_list, mbio); bio_list_add(&conf->pending_bio_list, mbio);
spin_unlock_irqrestore(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
} }
r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL); r1_bio_write_done(r1_bio);
kfree(behind_pages); /* the behind pages are attached to the bios now */
/* In case raid1d snuck in to freeze_array */ /* In case raid1d snuck in to freeze_array */
wake_up(&conf->wait_barrier); wake_up(&conf->wait_barrier);
......
...@@ -94,7 +94,9 @@ struct r1bio_s { ...@@ -94,7 +94,9 @@ struct r1bio_s {
int read_disk; int read_disk;
struct list_head retry_list; struct list_head retry_list;
struct bitmap_update *bitmap_update; /* Next two are only valid when R1BIO_BehindIO is set */
struct page **behind_pages;
int behind_page_count;
/* /*
* if the IO is in WRITE direction, then multiple bios are used. * if the IO is in WRITE direction, then multiple bios are used.
* We choose the number when they are allocated. * We choose the number when they are allocated.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment