Commit 313d90b7 authored by Neil Brown's avatar Neil Brown Committed by Christoph Hellwig

[PATCH] Initial md/raid5 support for 2.5 (with bio)

With this patch raid5 works.  There is still some more
work to though.

- uses bio instead of buffer_head
- stripe cache is now a fixed size.
   If read requests are smaller, we read the whole block anyway
   If write reqeusts are smaller, we pre-read.
- stripe_head is now variable sized with an array of structures at
  the end.  We allocate extra space depending on how many devices
  are in the array.
  stripe_head has it's very own slab cache.
- store and use bdev for each device in array

by-passing the cache for reads is currently disabled.  I need to
think through the implications (and implementation) of allowing
large bion that are larger than the stripe cache to go directly
to the device (if it isn't failed of-course).
parent 7d684b93
This diff is collapsed.
......@@ -26,31 +26,30 @@
static struct xor_block_template *active_template;
void
xor_block(unsigned int count, struct buffer_head **bh_ptr)
xor_block(unsigned int count, unsigned int bytes, void **ptr)
{
unsigned long *p0, *p1, *p2, *p3, *p4;
unsigned long bytes = bh_ptr[0]->b_size;
p0 = (unsigned long *) bh_ptr[0]->b_data;
p1 = (unsigned long *) bh_ptr[1]->b_data;
p0 = (unsigned long *) ptr[0];
p1 = (unsigned long *) ptr[1];
if (count == 2) {
active_template->do_2(bytes, p0, p1);
return;
}
p2 = (unsigned long *) bh_ptr[2]->b_data;
p2 = (unsigned long *) ptr[2];
if (count == 3) {
active_template->do_3(bytes, p0, p1, p2);
return;
}
p3 = (unsigned long *) bh_ptr[3]->b_data;
p3 = (unsigned long *) ptr[3];
if (count == 4) {
active_template->do_4(bytes, p0, p1, p2, p3);
return;
}
p4 = (unsigned long *) bh_ptr[4]->b_data;
p4 = (unsigned long *) ptr[4];
active_template->do_5(bytes, p0, p1, p2, p3, p4);
}
......
......@@ -7,21 +7,21 @@
/*
*
* Each stripe contains one buffer per disc. Each buffer can be in
* one of a number of states determined by bh_state. Changes between
* one of a number of states stored in "flags". Changes between
* these states happen *almost* exclusively under a per-stripe
* spinlock. Some very specific changes can happen in b_end_io, and
* spinlock. Some very specific changes can happen in bi_end_io, and
* these are not protected by the spin lock.
*
* The bh_state bits that are used to represent these states are:
* BH_Uptodate, BH_Lock
* The flag bits that are used to represent these states are:
* R5_UPTODATE and R5_LOCKED
*
* State Empty == !Uptodate, !Lock
* State Empty == !UPTODATE, !LOCK
* We have no data, and there is no active request
* State Want == !Uptodate, Lock
* State Want == !UPTODATE, LOCK
* A read request is being submitted for this block
* State Dirty == Uptodate, Lock
* State Dirty == UPTODATE, LOCK
* Some new data is in this buffer, and it is being written out
* State Clean == Uptodate, !Lock
* State Clean == UPTODATE, !LOCK
* We have valid data which is the same as on disc
*
* The possible state transitions are:
......@@ -124,24 +124,29 @@
* plus raid5d if it is handling it, plus one for each active request
* on a cached buffer.
*/
struct stripe_head {
struct stripe_head *hash_next, **hash_pprev; /* hash pointers */
struct list_head lru; /* inactive_list or handle_list */
struct raid5_private_data *raid_conf;
struct buffer_head *bh_cache[MD_SB_DISKS]; /* buffered copy */
struct buffer_head *bh_read[MD_SB_DISKS]; /* read request buffers of the MD device */
struct buffer_head *bh_write[MD_SB_DISKS]; /* write request buffers of the MD device */
struct buffer_head *bh_written[MD_SB_DISKS]; /* write request buffers of the MD device that have been scheduled for write */
struct page *bh_page[MD_SB_DISKS]; /* saved bh_cache[n]->b_page when reading around the cache */
unsigned long sector; /* sector of this row */
int size; /* buffers size */
sector_t sector; /* sector of this row */
int pd_idx; /* parity disk index */
unsigned long state; /* state flags */
atomic_t count; /* nr of active thread/requests */
spinlock_t lock;
int sync_redone;
struct r5dev {
struct bio req;
struct bio_vec vec;
struct page *page;
struct bio *toread, *towrite, *written;
sector_t sector; /* sector of this page */
unsigned long flags;
} dev[1]; /* allocated with extra space depending of RAID geometry */
};
/* Flags */
#define R5_UPTODATE 0 /* page contains current data */
#define R5_LOCKED 1 /* IO has been submitted on "req" */
#define R5_OVERWRITE 2 /* towrite covers whole page */
/*
* Write method
......@@ -187,6 +192,7 @@ struct stripe_head {
struct disk_info {
kdev_t dev;
struct block_device *bdev;
int operational;
int number;
int raid_disk;
......@@ -201,7 +207,6 @@ struct raid5_private_data {
mdk_thread_t *thread, *resync_thread;
struct disk_info disks[MD_SB_DISKS];
struct disk_info *spare;
int buffer_size;
int chunk_size, level, algorithm;
int raid_disks, working_disks, failed_disks;
int resync_parity;
......@@ -210,6 +215,9 @@ struct raid5_private_data {
struct list_head handle_list; /* stripes needing handling */
struct list_head delayed_list; /* stripes that have plugged requests */
atomic_t preread_active_stripes; /* stripes with scheduled io */
char cache_name[20];
kmem_cache_t *slab_cache; /* for allocating stripes */
/*
* Free stripes pool
*/
......
......@@ -5,7 +5,7 @@
#define MAX_XOR_BLOCKS 5
extern void xor_block(unsigned int count, struct buffer_head **bh_ptr);
extern void xor_block(unsigned int count, unsigned int bytes, void **ptr);
struct xor_block_template {
struct xor_block_template *next;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment