Commit f8b46092 authored by Jens Axboe's avatar Jens Axboe

[PATCH] bio_add_page()

This is bio_add_page(), 100% identical to the version I sent out for
comments earlier this week.  With the previous queue restriction patch,
this guarentees that we can always add a page worth of data to the bio.
bio_add_page() returns 0 on success, and 1 on failure.  Either the page
is added completely, or the attempt is aborted.

bio_add_page() uses the normal queue restrictions to determine whether
we an add the page or not. if a queue has further restrictions, it can
define a q->merge_bvec_fn() to further impose limits.

Patch also includes changes to ll_rw_kio(), if for nothing else to
demonstrate how to use this piece of instrastructure.
parent efbb72f9
......@@ -46,13 +46,11 @@ struct biovec_pool {
*/
#define BV(x) { x, "biovec-" #x }
static struct biovec_pool bvec_array[BIOVEC_NR_POOLS] = {
BV(1), BV(4), BV(16), BV(64), BV(128), BV(256)
};
static struct biovec_pool bvec_array[BIOVEC_NR_POOLS] = {
BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
};
#undef BV
#define BIO_MAX_PAGES (bvec_array[BIOVEC_NR_POOLS - 1].size)
static void *slab_pool_alloc(int gfp_mask, void *data)
{
return kmem_cache_alloc(data, gfp_mask);
......@@ -77,7 +75,7 @@ static inline struct bio_vec *bvec_alloc(int gfp_mask, int nr, int *idx)
case 5 ... 16: *idx = 2; break;
case 17 ... 64: *idx = 3; break;
case 65 ... 128: *idx = 4; break;
case 129 ... 256: *idx = 5; break;
case 129 ... BIO_MAX_PAGES: *idx = 5; break;
default:
return NULL;
}
......@@ -103,7 +101,7 @@ void bio_destructor(struct bio *bio)
/*
* cloned bio doesn't own the veclist
*/
if (!(bio->bi_flags & (1 << BIO_CLONED)))
if (!bio_flagged(bio, BIO_CLONED))
mempool_free(bio->bi_io_vec, bp->pool);
mempool_free(bio, bio_pool);
......@@ -180,7 +178,7 @@ void bio_put(struct bio *bio)
inline int bio_phys_segments(request_queue_t *q, struct bio *bio)
{
if (unlikely(!(bio->bi_flags & (1 << BIO_SEG_VALID))))
if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
blk_recount_segments(q, bio);
return bio->bi_phys_segments;
......@@ -188,7 +186,7 @@ inline int bio_phys_segments(request_queue_t *q, struct bio *bio)
inline int bio_hw_segments(request_queue_t *q, struct bio *bio)
{
if (unlikely(!(bio->bi_flags & (1 << BIO_SEG_VALID))))
if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
blk_recount_segments(q, bio);
return bio->bi_hw_segments;
......@@ -218,7 +216,7 @@ inline void __bio_clone(struct bio *bio, struct bio *bio_src)
*/
bio->bi_vcnt = bio_src->bi_vcnt;
bio->bi_idx = bio_src->bi_idx;
if (bio_src->bi_flags & (1 << BIO_SEG_VALID)) {
if (bio_flagged(bio, BIO_SEG_VALID)) {
bio->bi_phys_segments = bio_src->bi_phys_segments;
bio->bi_hw_segments = bio_src->bi_hw_segments;
bio->bi_flags |= (1 << BIO_SEG_VALID);
......@@ -322,6 +320,87 @@ struct bio *bio_copy(struct bio *bio, int gfp_mask, int copy)
return NULL;
}
/**
* bio_add_page - attempt to add page to bio
* @bio: destination bio
* @page: page to add
* @len: vec entry length
* @offset: vec entry offset
*
* Attempt to add a page to the bio_vec maplist. This can fail for a
* number of reasons, such as the bio being full or target block
* device limitations.
*/
int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
unsigned int offset)
{
request_queue_t *q = bdev_get_queue(bio->bi_bdev);
int fail_segments = 0, retried_segments = 0;
struct bio_vec *bvec;
/*
* cloned bio must not modify vec list
*/
if (unlikely(bio_flagged(bio, BIO_CLONED)))
return 1;
/*
* FIXME: change bi_max?
*/
BUG_ON(bio->bi_max > BIOVEC_NR_POOLS);
if (bio->bi_vcnt >= bvec_array[bio->bi_max].nr_vecs)
return 1;
if (((bio->bi_size + len) >> 9) > q->max_sectors)
return 1;
/*
* we might loose a segment or two here, but rather that than
* make this too complex.
*/
retry_segments:
if (bio_phys_segments(q, bio) >= q->max_phys_segments
|| bio_hw_segments(q, bio) >= q->max_hw_segments)
fail_segments = 1;
if (fail_segments) {
if (retried_segments)
return 1;
bio->bi_flags &= ~(1 << BIO_SEG_VALID);
retried_segments = 1;
goto retry_segments;
}
/*
* setup the new entry, we might clear it again later if we
* cannot add the page
*/
bvec = &bio->bi_io_vec[bio->bi_vcnt];
bvec->bv_page = page;
bvec->bv_len = len;
bvec->bv_offset = offset;
/*
* if queue has other restrictions (eg varying max sector size
* depending on offset), it can specify a merge_bvec_fn in the
* queue to get further control
*/
if (q->merge_bvec_fn && q->merge_bvec_fn(q, bio, bvec)) {
bvec->bv_page = NULL;
bvec->bv_len = 0;
bvec->bv_offset = 0;
return 1;
}
bio->bi_vcnt++;
bio->bi_phys_segments++;
bio->bi_hw_segments++;
bio->bi_size += len;
return 0;
}
static void bio_end_io_kio(struct bio *bio)
{
struct kiobuf *kio = (struct kiobuf *) bio->bi_private;
......@@ -345,7 +424,6 @@ static void bio_end_io_kio(struct bio *bio)
void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t sector)
{
int i, offset, size, err, map_i, total_nr_pages, nr_pages;
struct bio_vec *bvec;
struct bio *bio;
err = 0;
......@@ -373,7 +451,7 @@ void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t s
map_i = 0;
next_chunk:
nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - 9);
nr_pages = BIO_MAX_PAGES;
if (nr_pages > total_nr_pages)
nr_pages = total_nr_pages;
......@@ -393,8 +471,7 @@ void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t s
bio->bi_end_io = bio_end_io_kio;
bio->bi_private = kio;
bvec = bio->bi_io_vec;
for (i = 0; i < nr_pages; i++, bvec++, map_i++) {
for (i = 0; i < nr_pages; i++, map_i++) {
int nbytes = PAGE_SIZE - offset;
if (nbytes > size)
......@@ -402,15 +479,12 @@ void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t s
BUG_ON(kio->maplist[map_i] == NULL);
if (bio->bi_size + nbytes > (BIO_MAX_SECTORS << 9))
goto queue_io;
bio->bi_vcnt++;
bio->bi_size += nbytes;
bvec->bv_page = kio->maplist[map_i];
bvec->bv_len = nbytes;
bvec->bv_offset = offset;
/*
* if we can't add this page to the bio, submit for i/o
* and alloc a new one if needed
*/
if (bio_add_page(bio, kio->maplist[map_i], nbytes, offset))
break;
/*
* kiobuf only has an offset into the first page
......@@ -423,7 +497,6 @@ void ll_rw_kio(int rw, struct kiobuf *kio, struct block_device *bdev, sector_t s
kio->offset += nbytes;
}
queue_io:
submit_bio(rw, bio);
if (total_nr_pages)
......@@ -538,3 +611,4 @@ EXPORT_SYMBOL(__bio_clone);
EXPORT_SYMBOL(bio_clone);
EXPORT_SYMBOL(bio_phys_segments);
EXPORT_SYMBOL(bio_hw_segments);
EXPORT_SYMBOL(bio_add_page);
......@@ -37,8 +37,9 @@
#define BIO_BUG_ON
#endif
#define BIO_MAX_SECTORS 128
#define BIO_MAX_SIZE (BIO_MAX_SECTORS << 9)
#define BIO_MAX_PAGES (256)
#define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_CACHE_SHIFT)
#define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9)
/*
* was unsigned short, but we might as well be ready for > 64kB I/O pages
......@@ -101,6 +102,7 @@ struct bio {
#define BIO_EOF 2 /* out-out-bounds error */
#define BIO_SEG_VALID 3 /* nr_hw_seg valid */
#define BIO_CLONED 4 /* doesn't own data */
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
/*
* bio bi_rw flags
......@@ -123,7 +125,7 @@ struct bio {
#define bio_offset(bio) bio_iovec((bio))->bv_offset
#define bio_sectors(bio) ((bio)->bi_size >> 9)
#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio)))
#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_BARRIER))
#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER))
/*
* will die
......@@ -203,6 +205,8 @@ extern struct bio *bio_copy(struct bio *, int, int);
extern inline void bio_init(struct bio *);
extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
#ifdef CONFIG_HIGHMEM
/*
* remember to add offset! and never ever reenable interrupts between a
......
......@@ -120,6 +120,9 @@ typedef int (make_request_fn) (request_queue_t *q, struct bio *bio);
typedef int (prep_rq_fn) (request_queue_t *, struct request *);
typedef void (unplug_fn) (void *q);
struct bio_vec;
typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *);
enum blk_queue_state {
Queue_down,
Queue_up,
......@@ -163,6 +166,7 @@ struct request_queue
make_request_fn *make_request_fn;
prep_rq_fn *prep_rq_fn;
unplug_fn *unplug_fn;
merge_bvec_fn *merge_bvec_fn;
struct backing_dev_info backing_dev_info;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment