Commit 67a670bd authored by Linus Torvalds's avatar Linus Torvalds

Merge http://linux-isdn.bkbits.net/linux-2.5.make-next

into home.transmeta.com:/home/torvalds/v2.5/linux
parents 8876c643 66ebd50b
...@@ -436,14 +436,15 @@ static int alloc_array_sb(mddev_t * mddev) ...@@ -436,14 +436,15 @@ static int alloc_array_sb(mddev_t * mddev)
static int alloc_disk_sb(mdk_rdev_t * rdev) static int alloc_disk_sb(mdk_rdev_t * rdev)
{ {
if (rdev->sb) if (rdev->sb_page)
MD_BUG(); MD_BUG();
rdev->sb = (mdp_super_t *) __get_free_page(GFP_KERNEL); rdev->sb_page = alloc_page(GFP_KERNEL);
if (!rdev->sb) { if (!rdev->sb_page) {
printk(OUT_OF_MEM); printk(OUT_OF_MEM);
return -EINVAL; return -EINVAL;
} }
rdev->sb = (mdp_super_t *) page_address(rdev->sb_page);
clear_page(rdev->sb); clear_page(rdev->sb);
return 0; return 0;
...@@ -451,9 +452,10 @@ static int alloc_disk_sb(mdk_rdev_t * rdev) ...@@ -451,9 +452,10 @@ static int alloc_disk_sb(mdk_rdev_t * rdev)
static void free_disk_sb(mdk_rdev_t * rdev) static void free_disk_sb(mdk_rdev_t * rdev)
{ {
if (rdev->sb) { if (rdev->sb_page) {
free_page((unsigned long) rdev->sb); page_cache_release(rdev->sb_page);
rdev->sb = NULL; rdev->sb = NULL;
rdev->sb_page = NULL;
rdev->sb_offset = 0; rdev->sb_offset = 0;
rdev->size = 0; rdev->size = 0;
} else { } else {
...@@ -462,13 +464,42 @@ static void free_disk_sb(mdk_rdev_t * rdev) ...@@ -462,13 +464,42 @@ static void free_disk_sb(mdk_rdev_t * rdev)
} }
} }
static void bi_complete(struct bio *bio)
{
complete((struct completion*)bio->bi_private);
}
static int sync_page_io(struct block_device *bdev, sector_t sector, int size,
struct page *page, int rw)
{
struct bio bio;
struct bio_vec vec;
struct completion event;
bio_init(&bio);
bio.bi_io_vec = &vec;
vec.bv_page = page;
vec.bv_len = size;
vec.bv_offset = 0;
bio.bi_vcnt = 1;
bio.bi_idx = 0;
bio.bi_size = size;
bio.bi_bdev = bdev;
bio.bi_sector = sector;
init_completion(&event);
bio.bi_private = &event;
bio.bi_end_io = bi_complete;
submit_bio(rw, &bio);
run_task_queue(&tq_disk);
wait_for_completion(&event);
return test_bit(BIO_UPTODATE, &bio.bi_flags);
}
static int read_disk_sb(mdk_rdev_t * rdev) static int read_disk_sb(mdk_rdev_t * rdev)
{ {
struct address_space *mapping = rdev->bdev->bd_inode->i_mapping;
struct page *page;
char *p;
unsigned long sb_offset; unsigned long sb_offset;
int n = PAGE_CACHE_SIZE / BLOCK_SIZE;
if (!rdev->sb) { if (!rdev->sb) {
MD_BUG(); MD_BUG();
...@@ -483,24 +514,14 @@ static int read_disk_sb(mdk_rdev_t * rdev) ...@@ -483,24 +514,14 @@ static int read_disk_sb(mdk_rdev_t * rdev)
*/ */
sb_offset = calc_dev_sboffset(rdev->dev, rdev->mddev, 1); sb_offset = calc_dev_sboffset(rdev->dev, rdev->mddev, 1);
rdev->sb_offset = sb_offset; rdev->sb_offset = sb_offset;
page = read_cache_page(mapping, sb_offset/n,
(filler_t *)mapping->a_ops->readpage, NULL); if (!sync_page_io(rdev->bdev, sb_offset<<1, MD_SB_BYTES, rdev->sb_page, READ))
if (IS_ERR(page))
goto out;
wait_on_page_locked(page);
if (!PageUptodate(page))
goto fail;
if (PageError(page))
goto fail; goto fail;
p = (char *)page_address(page) + BLOCK_SIZE * (sb_offset % n);
memcpy((char*)rdev->sb, p, MD_SB_BYTES);
page_cache_release(page);
printk(KERN_INFO " [events: %08lx]\n", (unsigned long)rdev->sb->events_lo); printk(KERN_INFO " [events: %08lx]\n", (unsigned long)rdev->sb->events_lo);
return 0; return 0;
fail: fail:
page_cache_release(page);
out:
printk(NO_SB,partition_name(rdev->dev)); printk(NO_SB,partition_name(rdev->dev));
return -EINVAL; return -EINVAL;
} }
...@@ -893,11 +914,6 @@ static mdk_rdev_t * find_rdev_all(kdev_t dev) ...@@ -893,11 +914,6 @@ static mdk_rdev_t * find_rdev_all(kdev_t dev)
static int write_disk_sb(mdk_rdev_t * rdev) static int write_disk_sb(mdk_rdev_t * rdev)
{ {
struct block_device *bdev = rdev->bdev;
struct address_space *mapping = bdev->bd_inode->i_mapping;
struct page *page;
unsigned offs;
int error;
kdev_t dev = rdev->dev; kdev_t dev = rdev->dev;
unsigned long sb_offset, size; unsigned long sb_offset, size;
...@@ -933,29 +949,11 @@ static int write_disk_sb(mdk_rdev_t * rdev) ...@@ -933,29 +949,11 @@ static int write_disk_sb(mdk_rdev_t * rdev)
} }
printk(KERN_INFO "(write) %s's sb offset: %ld\n", partition_name(dev), sb_offset); printk(KERN_INFO "(write) %s's sb offset: %ld\n", partition_name(dev), sb_offset);
fsync_bdev(bdev);
page = grab_cache_page(mapping, sb_offset/(PAGE_CACHE_SIZE/BLOCK_SIZE)); if (!sync_page_io(rdev->bdev, sb_offset<<1, MD_SB_BYTES, rdev->sb_page, WRITE))
offs = sb_offset % (PAGE_CACHE_SIZE/BLOCK_SIZE);
if (!page)
goto fail; goto fail;
error = mapping->a_ops->prepare_write(NULL, page, offs,
offs + MD_SB_BYTES);
if (error)
goto unlock;
memcpy((char *)page_address(page) + offs, rdev->sb, MD_SB_BYTES);
error = mapping->a_ops->commit_write(NULL, page, offs,
offs + MD_SB_BYTES);
if (error)
goto unlock;
unlock_page(page);
wait_on_page_locked(page);
page_cache_release(page);
fsync_bdev(bdev);
skip: skip:
return 0; return 0;
unlock:
unlock_page(page);
page_cache_release(page);
fail: fail:
printk("md: write_disk_sb failed for device %s\n", partition_name(dev)); printk("md: write_disk_sb failed for device %s\n", partition_name(dev));
return 1; return 1;
......
...@@ -24,19 +24,19 @@ ...@@ -24,19 +24,19 @@
#include <asm/bitops.h> #include <asm/bitops.h>
#include <asm/atomic.h> #include <asm/atomic.h>
static mdk_personality_t raid5_personality;
/* /*
* Stripe cache * Stripe cache
*/ */
#define NR_STRIPES 256 #define NR_STRIPES 256
#define STRIPE_SIZE PAGE_SIZE
#define STRIPE_SECTORS (STRIPE_SIZE>>9)
#define IO_THRESHOLD 1 #define IO_THRESHOLD 1
#define HASH_PAGES 1 #define HASH_PAGES 1
#define HASH_PAGES_ORDER 0 #define HASH_PAGES_ORDER 0
#define NR_HASH (HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *)) #define NR_HASH (HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *))
#define HASH_MASK (NR_HASH - 1) #define HASH_MASK (NR_HASH - 1)
#define stripe_hash(conf, sect) ((conf)->stripe_hashtbl[((sect) / ((conf)->buffer_size >> 9)) & HASH_MASK]) #define stripe_hash(conf, sect) ((conf)->stripe_hashtbl[((sect) / STRIPE_SECTORS) & HASH_MASK])
/* /*
* The following can be used to debug the driver * The following can be used to debug the driver
...@@ -142,47 +142,36 @@ static struct stripe_head *get_free_stripe(raid5_conf_t *conf) ...@@ -142,47 +142,36 @@ static struct stripe_head *get_free_stripe(raid5_conf_t *conf)
static void shrink_buffers(struct stripe_head *sh, int num) static void shrink_buffers(struct stripe_head *sh, int num)
{ {
struct buffer_head *bh; struct page *p;
int i; int i;
for (i=0; i<num ; i++) { for (i=0; i<num ; i++) {
bh = sh->bh_cache[i]; p = sh->dev[i].page;
if (!bh) if (!p)
return; continue;
sh->bh_cache[i] = NULL; sh->dev[i].page = NULL;
free_page((unsigned long) bh->b_data); page_cache_release(p);
kfree(bh);
} }
} }
static int grow_buffers(struct stripe_head *sh, int num, int b_size, int priority) static int grow_buffers(struct stripe_head *sh, int num)
{ {
struct buffer_head *bh;
int i; int i;
for (i=0; i<num; i++) { for (i=0; i<num; i++) {
struct page *page; struct page *page;
bh = kmalloc(sizeof(struct buffer_head), priority);
if (!bh) if (!(page = alloc_page(GFP_KERNEL))) {
return 1;
memset(bh, 0, sizeof (struct buffer_head));
if ((page = alloc_page(priority)))
bh->b_data = page_address(page);
else {
kfree(bh);
return 1; return 1;
} }
atomic_set(&bh->b_count, 0); sh->dev[i].page = page;
bh->b_page = page;
sh->bh_cache[i] = bh;
} }
return 0; return 0;
} }
static struct buffer_head *raid5_build_block (struct stripe_head *sh, int i); static void raid5_build_block (struct stripe_head *sh, int i);
static inline void init_stripe(struct stripe_head *sh, unsigned long sector) static inline void init_stripe(struct stripe_head *sh, unsigned long sector, int pd_idx)
{ {
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int disks = conf->raid_disks, i; int disks = conf->raid_disks, i;
...@@ -198,40 +187,26 @@ static inline void init_stripe(struct stripe_head *sh, unsigned long sector) ...@@ -198,40 +187,26 @@ static inline void init_stripe(struct stripe_head *sh, unsigned long sector)
remove_hash(sh); remove_hash(sh);
sh->sector = sector; sh->sector = sector;
sh->size = conf->buffer_size; sh->pd_idx = pd_idx;
sh->state = 0; sh->state = 0;
for (i=disks; i--; ) { for (i=disks; i--; ) {
if (sh->bh_read[i] || sh->bh_write[i] || sh->bh_written[i] || struct r5dev *dev = &sh->dev[i];
buffer_locked(sh->bh_cache[i])) {
if (dev->toread || dev->towrite || dev->written ||
test_bit(R5_LOCKED, &dev->flags)) {
printk("sector=%lx i=%d %p %p %p %d\n", printk("sector=%lx i=%d %p %p %p %d\n",
sh->sector, i, sh->bh_read[i], sh->sector, i, dev->toread,
sh->bh_write[i], sh->bh_written[i], dev->towrite, dev->written,
buffer_locked(sh->bh_cache[i])); test_bit(R5_LOCKED, &dev->flags));
BUG(); BUG();
} }
clear_buffer_uptodate(sh->bh_cache[i]); dev->flags = 0;
raid5_build_block(sh, i); raid5_build_block(sh, i);
} }
insert_hash(conf, sh); insert_hash(conf, sh);
} }
/* the buffer size has changed, so unhash all stripes
* as active stripes complete, they will go onto inactive list
*/
static void shrink_stripe_cache(raid5_conf_t *conf)
{
int i;
CHECK_DEVLOCK();
if (atomic_read(&conf->active_stripes))
BUG();
for (i=0; i < NR_HASH; i++) {
struct stripe_head *sh;
while ((sh = conf->stripe_hashtbl[i]))
remove_hash(sh);
}
}
static struct stripe_head *__find_stripe(raid5_conf_t *conf, unsigned long sector) static struct stripe_head *__find_stripe(raid5_conf_t *conf, unsigned long sector)
{ {
struct stripe_head *sh; struct stripe_head *sh;
...@@ -245,53 +220,16 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, unsigned long secto ...@@ -245,53 +220,16 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, unsigned long secto
return NULL; return NULL;
} }
static struct stripe_head *get_active_stripe(raid5_conf_t *conf, unsigned long sector, int size, int noblock) static struct stripe_head *get_active_stripe(raid5_conf_t *conf, unsigned long sector,
int pd_idx, int noblock)
{ {
struct stripe_head *sh; struct stripe_head *sh;
PRINTK("get_stripe, sector %lu\n", sector); PRINTK("get_stripe, sector %lu\n", sector);
md_spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
do { do {
if (conf->buffer_size == 0 ||
(size && size != conf->buffer_size)) {
/* either the size is being changed (buffer_size==0) or
* we need to change it.
* If size==0, we can proceed as soon as buffer_size gets set.
* If size>0, we can proceed when active_stripes reaches 0, or
* when someone else sets the buffer_size to size.
* If someone sets the buffer size to something else, we will need to
* assert that we want to change it again
*/
int oldsize = conf->buffer_size;
PRINTK("get_stripe %ld/%d buffer_size is %d, %d active\n", sector, size, conf->buffer_size, atomic_read(&conf->active_stripes));
if (size==0)
wait_event_lock_irq(conf->wait_for_stripe,
conf->buffer_size,
conf->device_lock);
else {
while (conf->buffer_size != size && atomic_read(&conf->active_stripes)) {
conf->buffer_size = 0;
wait_event_lock_irq(conf->wait_for_stripe,
atomic_read(&conf->active_stripes)==0 || conf->buffer_size,
conf->device_lock);
PRINTK("waited and now %ld/%d buffer_size is %d - %d active\n", sector, size,
conf->buffer_size, atomic_read(&conf->active_stripes));
}
if (conf->buffer_size != size) {
printk("raid5: switching cache buffer size, %d --> %d\n", oldsize, size);
shrink_stripe_cache(conf);
if (size==0) BUG();
conf->buffer_size = size;
PRINTK("size now %d\n", conf->buffer_size);
}
}
}
if (size == 0)
sector -= sector & ((conf->buffer_size>>9)-1);
sh = __find_stripe(conf, sector); sh = __find_stripe(conf, sector);
if (!sh) { if (!sh) {
if (!conf->inactive_blocked) if (!conf->inactive_blocked)
...@@ -307,7 +245,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, unsigned long s ...@@ -307,7 +245,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, unsigned long s
conf->device_lock); conf->device_lock);
conf->inactive_blocked = 0; conf->inactive_blocked = 0;
} else } else
init_stripe(sh, sector); init_stripe(sh, sector, pd_idx);
} else { } else {
if (atomic_read(&sh->count)) { if (atomic_read(&sh->count)) {
if (!list_empty(&sh->lru)) if (!list_empty(&sh->lru))
...@@ -325,25 +263,35 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, unsigned long s ...@@ -325,25 +263,35 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, unsigned long s
if (sh) if (sh)
atomic_inc(&sh->count); atomic_inc(&sh->count);
md_spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
return sh; return sh;
} }
static int grow_stripes(raid5_conf_t *conf, int num, int priority) static int grow_stripes(raid5_conf_t *conf, int num)
{ {
struct stripe_head *sh; struct stripe_head *sh;
kmem_cache_t *sc;
int devs = conf->raid_disks;
sprintf(conf->cache_name, "md/raid5-%d", conf->mddev->__minor);
sc = kmem_cache_create(conf->cache_name,
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
0, 0, NULL, NULL);
if (!sc)
return 1;
conf->slab_cache = sc;
while (num--) { while (num--) {
sh = kmalloc(sizeof(struct stripe_head), priority); sh = kmem_cache_alloc(sc, GFP_KERNEL);
if (!sh) if (!sh)
return 1; return 1;
memset(sh, 0, sizeof(*sh)); memset(sh, 0, sizeof(*sh) + (devs-1)*sizeof(struct r5dev));
sh->raid_conf = conf; sh->raid_conf = conf;
sh->lock = SPIN_LOCK_UNLOCKED; sh->lock = SPIN_LOCK_UNLOCKED;
if (grow_buffers(sh, conf->raid_disks, PAGE_SIZE, priority)) { if (grow_buffers(sh, conf->raid_disks)) {
shrink_buffers(sh, conf->raid_disks); shrink_buffers(sh, conf->raid_disks);
kfree(sh); kmem_cache_free(sc, sh);
return 1; return 1;
} }
/* we just created an active stripe so... */ /* we just created an active stripe so... */
...@@ -355,11 +303,11 @@ static int grow_stripes(raid5_conf_t *conf, int num, int priority) ...@@ -355,11 +303,11 @@ static int grow_stripes(raid5_conf_t *conf, int num, int priority)
return 0; return 0;
} }
static void shrink_stripes(raid5_conf_t *conf, int num) static void shrink_stripes(raid5_conf_t *conf)
{ {
struct stripe_head *sh; struct stripe_head *sh;
while (num--) { while (1) {
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
sh = get_free_stripe(conf); sh = get_free_stripe(conf);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
...@@ -368,21 +316,22 @@ static void shrink_stripes(raid5_conf_t *conf, int num) ...@@ -368,21 +316,22 @@ static void shrink_stripes(raid5_conf_t *conf, int num)
if (atomic_read(&sh->count)) if (atomic_read(&sh->count))
BUG(); BUG();
shrink_buffers(sh, conf->raid_disks); shrink_buffers(sh, conf->raid_disks);
kfree(sh); kmem_cache_free(conf->slab_cache, sh);
atomic_dec(&conf->active_stripes); atomic_dec(&conf->active_stripes);
} }
kmem_cache_destroy(conf->slab_cache);
conf->slab_cache = NULL;
} }
static void raid5_end_read_request (struct bio * bi)
static void raid5_end_read_request (struct buffer_head * bh, int uptodate)
{ {
struct stripe_head *sh = bh->b_private; struct stripe_head *sh = bi->bi_private;
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int disks = conf->raid_disks, i; int disks = conf->raid_disks, i;
unsigned long flags; int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
for (i=0 ; i<disks; i++) for (i=0 ; i<disks; i++)
if (bh == sh->bh_cache[i]) if (bi == &sh->dev[i].req)
break; break;
PRINTK("end_read_request %lu/%d, count: %d, uptodate %d.\n", sh->sector, i, atomic_read(&sh->count), uptodate); PRINTK("end_read_request %lu/%d, count: %d, uptodate %d.\n", sh->sector, i, atomic_read(&sh->count), uptodate);
...@@ -392,7 +341,9 @@ static void raid5_end_read_request (struct buffer_head * bh, int uptodate) ...@@ -392,7 +341,9 @@ static void raid5_end_read_request (struct buffer_head * bh, int uptodate)
} }
if (uptodate) { if (uptodate) {
struct buffer_head *buffer; #if 0
struct bio *bio;
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
/* we can return a buffer if we bypassed the cache or /* we can return a buffer if we bypassed the cache or
* if the top buffer is not in highmem. If there are * if the top buffer is not in highmem. If there are
...@@ -409,38 +360,43 @@ static void raid5_end_read_request (struct buffer_head * bh, int uptodate) ...@@ -409,38 +360,43 @@ static void raid5_end_read_request (struct buffer_head * bh, int uptodate)
} else } else
buffer = NULL; buffer = NULL;
spin_unlock_irqrestore(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
if (sh->bh_page[i]==NULL) if (sh->bh_page[i]==bh->b_page)
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
if (buffer) { if (buffer) {
if (buffer->b_page != bh->b_page) if (buffer->b_page != bh->b_page)
memcpy(buffer->b_data, bh->b_data, bh->b_size); memcpy(buffer->b_data, bh->b_data, bh->b_size);
buffer->b_end_io(buffer, 1); buffer->b_end_io(buffer, 1);
} }
#else
set_bit(R5_UPTODATE, &sh->dev[i].flags);
#endif
} else { } else {
md_error(conf->mddev, bh->b_bdev); md_error(conf->mddev, bi->bi_bdev);
clear_buffer_uptodate(bh); clear_bit(R5_UPTODATE, &sh->dev[i].flags);
} }
#if 0
/* must restore b_page before unlocking buffer... */ /* must restore b_page before unlocking buffer... */
if (sh->bh_page[i]) { if (sh->bh_page[i] != bh->b_page) {
bh->b_page = sh->bh_page[i]; bh->b_page = sh->bh_page[i];
bh->b_data = page_address(bh->b_page); bh->b_data = page_address(bh->b_page);
sh->bh_page[i] = NULL;
clear_buffer_uptodate(bh); clear_buffer_uptodate(bh);
} }
clear_buffer_locked(bh); #endif
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh); release_stripe(sh);
} }
static void raid5_end_write_request (struct buffer_head *bh, int uptodate) static void raid5_end_write_request (struct bio *bi)
{ {
struct stripe_head *sh = bh->b_private; struct stripe_head *sh = bi->bi_private;
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int disks = conf->raid_disks, i; int disks = conf->raid_disks, i;
unsigned long flags; unsigned long flags;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
for (i=0 ; i<disks; i++) for (i=0 ; i<disks; i++)
if (bh == sh->bh_cache[i]) if (bi == &sh->dev[i].req)
break; break;
PRINTK("end_write_request %lu/%d, count %d, uptodate: %d.\n", sh->sector, i, atomic_read(&sh->count), uptodate); PRINTK("end_write_request %lu/%d, count %d, uptodate: %d.\n", sh->sector, i, atomic_read(&sh->count), uptodate);
...@@ -449,41 +405,48 @@ static void raid5_end_write_request (struct buffer_head *bh, int uptodate) ...@@ -449,41 +405,48 @@ static void raid5_end_write_request (struct buffer_head *bh, int uptodate)
return; return;
} }
md_spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
if (!uptodate) if (!uptodate)
md_error(conf->mddev, bh->b_bdev); md_error(conf->mddev, bi->bi_bdev);
clear_buffer_locked(bh);
clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
__release_stripe(conf, sh); __release_stripe(conf, sh);
md_spin_unlock_irqrestore(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
} }
static struct buffer_head *raid5_build_block (struct stripe_head *sh, int i) static unsigned long compute_blocknr(struct stripe_head *sh, int i);
static void raid5_build_block (struct stripe_head *sh, int i)
{ {
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
struct buffer_head *bh = sh->bh_cache[i]; struct r5dev *dev = &sh->dev[i];
unsigned long block = sh->sector / (sh->size >> 9);
bio_init(&dev->req);
init_buffer(bh, raid5_end_read_request, sh); dev->req.bi_io_vec = &dev->vec;
bh->b_dev = conf->disks[i].dev; dev->req.bi_vcnt++;
/* FIXME - later we will need bdev here */ dev->vec.bv_page = dev->page;
bh->b_blocknr = block; dev->vec.bv_len = STRIPE_SIZE;
dev->vec.bv_offset = 0;
bh->b_state = (1 << BH_Req) | (1 << BH_Mapped);
bh->b_size = sh->size; dev->req.bi_bdev = conf->disks[i].bdev;
return bh; dev->req.bi_sector = sh->sector;
dev->req.bi_private = sh;
dev->flags = 0;
if (i != sh->pd_idx)
dev->sector = compute_blocknr(sh, i);
} }
static int raid5_error (mddev_t *mddev, kdev_t dev) static int error (mddev_t *mddev, kdev_t dev)
{ {
raid5_conf_t *conf = (raid5_conf_t *) mddev->private; raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
mdp_super_t *sb = mddev->sb; mdp_super_t *sb = mddev->sb;
struct disk_info *disk; struct disk_info *disk;
int i; int i;
PRINTK("raid5_error called\n"); PRINTK("raid5: error called\n");
for (i = 0, disk = conf->disks; i < conf->raid_disks; i++, disk++) { for (i = 0, disk = conf->disks; i < conf->raid_disks; i++, disk++) {
if (kdev_same(disk->dev, dev)) { if (kdev_same(disk->dev, dev)) {
...@@ -544,14 +507,14 @@ static int raid5_error (mddev_t *mddev, kdev_t dev) ...@@ -544,14 +507,14 @@ static int raid5_error (mddev_t *mddev, kdev_t dev)
* Input: a 'big' sector number, * Input: a 'big' sector number,
* Output: index of the data and parity disk, and the sector # in them. * Output: index of the data and parity disk, and the sector # in them.
*/ */
static unsigned long raid5_compute_sector(unsigned long r_sector, unsigned int raid_disks, static unsigned long raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
unsigned int data_disks, unsigned int * dd_idx, unsigned int data_disks, unsigned int * dd_idx,
unsigned int * pd_idx, raid5_conf_t *conf) unsigned int * pd_idx, raid5_conf_t *conf)
{ {
unsigned long stripe; sector_t stripe;
unsigned long chunk_number; unsigned long chunk_number;
unsigned int chunk_offset; unsigned int chunk_offset;
unsigned long new_sector; sector_t new_sector;
int sectors_per_chunk = conf->chunk_size >> 9; int sectors_per_chunk = conf->chunk_size >> 9;
/* First compute the information on this sector */ /* First compute the information on this sector */
...@@ -607,17 +570,17 @@ static unsigned long raid5_compute_sector(unsigned long r_sector, unsigned int r ...@@ -607,17 +570,17 @@ static unsigned long raid5_compute_sector(unsigned long r_sector, unsigned int r
return new_sector; return new_sector;
} }
#if 0
static unsigned long compute_blocknr(struct stripe_head *sh, int i) static sector_t compute_blocknr(struct stripe_head *sh, int i)
{ {
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int raid_disks = conf->raid_disks, data_disks = raid_disks - 1; int raid_disks = conf->raid_disks, data_disks = raid_disks - 1;
unsigned long new_sector = sh->sector, check; sector_t new_sector = sh->sector, check;
int sectors_per_chunk = conf->chunk_size >> 9; int sectors_per_chunk = conf->chunk_size >> 9;
unsigned long stripe = new_sector / sectors_per_chunk; sector_t stripe = new_sector / sectors_per_chunk;
int chunk_offset = new_sector % sectors_per_chunk; int chunk_offset = new_sector % sectors_per_chunk;
int chunk_number, dummy1, dummy2, dd_idx = i; int chunk_number, dummy1, dummy2, dd_idx = i;
unsigned long r_sector, blocknr; sector_t r_sector;
switch (conf->algorithm) { switch (conf->algorithm) {
case ALGORITHM_LEFT_ASYMMETRIC: case ALGORITHM_LEFT_ASYMMETRIC:
...@@ -637,22 +600,72 @@ static unsigned long compute_blocknr(struct stripe_head *sh, int i) ...@@ -637,22 +600,72 @@ static unsigned long compute_blocknr(struct stripe_head *sh, int i)
chunk_number = stripe * data_disks + i; chunk_number = stripe * data_disks + i;
r_sector = chunk_number * sectors_per_chunk + chunk_offset; r_sector = chunk_number * sectors_per_chunk + chunk_offset;
blocknr = r_sector / (sh->size >> 9);
check = raid5_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf); check = raid5_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf);
if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) { if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) {
printk("compute_blocknr: map not correct\n"); printk("compute_blocknr: map not correct\n");
return 0; return 0;
} }
return blocknr; return r_sector;
} }
#endif
#define check_xor() do { \
if (count == MAX_XOR_BLOCKS) { \
xor_block(count, bh_ptr); \ /*
count = 1; \ * Copy data between a page in the stripe cache, and one or more bion
} \ * The page could align with the middle of the bio, or there could be
* several bion, each with several bio_vecs, which cover part of the page
* Multiple bion are linked together on bi_next. There may be extras
* at the end of this list. We ignore them.
*/
static void copy_data(int frombio, struct bio *bio,
struct page *page,
sector_t sector)
{
char *pa = page_address(page);
struct bio_vec *bvl;
int i;
for (;bio && bio->bi_sector < sector+STRIPE_SECTORS;
bio = bio->bi_next) {
int page_offset;
if (bio->bi_sector >= sector)
page_offset = (signed)(bio->bi_sector - sector) * 512;
else
page_offset = (signed)(sector - bio->bi_sector) * -512;
bio_for_each_segment(bvl, bio, i) {
char *ba = __bio_kmap(bio, i);
int len = bio_iovec_idx(bio,i)->bv_len;
int clen;
int b_offset = 0;
if (page_offset < 0) {
b_offset = -page_offset;
page_offset += b_offset;
len -= b_offset;
}
if (len > 0 && page_offset + len > STRIPE_SIZE)
clen = STRIPE_SIZE - page_offset;
else clen = len;
if (len > 0) {
if (frombio)
memcpy(pa+page_offset, ba+b_offset, clen);
else
memcpy(ba+b_offset, pa+page_offset, clen);
}
__bio_kunmap(bio, i);
page_offset += len;
}
}
}
#define check_xor() do { \
if (count == MAX_XOR_BLOCKS) { \
xor_block(count, STRIPE_SIZE, ptr); \
count = 1; \
} \
} while(0) } while(0)
...@@ -660,88 +673,84 @@ static void compute_block(struct stripe_head *sh, int dd_idx) ...@@ -660,88 +673,84 @@ static void compute_block(struct stripe_head *sh, int dd_idx)
{ {
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int i, count, disks = conf->raid_disks; int i, count, disks = conf->raid_disks;
struct buffer_head *bh_ptr[MAX_XOR_BLOCKS], *bh; void *ptr[MAX_XOR_BLOCKS], *p;
PRINTK("compute_block, stripe %lu, idx %d\n", sh->sector, dd_idx); PRINTK("compute_block, stripe %lu, idx %d\n", sh->sector, dd_idx);
ptr[0] = page_address(sh->dev[dd_idx].page);
memset(sh->bh_cache[dd_idx]->b_data, 0, sh->size); memset(ptr[0], 0, STRIPE_SIZE);
bh_ptr[0] = sh->bh_cache[dd_idx];
count = 1; count = 1;
for (i = disks ; i--; ) { for (i = disks ; i--; ) {
if (i == dd_idx) if (i == dd_idx)
continue; continue;
bh = sh->bh_cache[i]; p = page_address(sh->dev[i].page);
if (buffer_uptodate(bh)) if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
bh_ptr[count++] = bh; ptr[count++] = p;
else else
printk("compute_block() %d, stripe %lu, %d not present\n", dd_idx, sh->sector, i); printk("compute_block() %d, stripe %lu, %d not present\n", dd_idx, sh->sector, i);
check_xor(); check_xor();
} }
if (count != 1) if (count != 1)
xor_block(count, bh_ptr); xor_block(count, STRIPE_SIZE, ptr);
set_buffer_uptodate(sh->bh_cache[dd_idx]); set_bit(R5_UPTODATE, &sh->dev[i].flags);
} }
static void compute_parity(struct stripe_head *sh, int method) static void compute_parity(struct stripe_head *sh, int method)
{ {
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count; int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count;
struct buffer_head *bh_ptr[MAX_XOR_BLOCKS]; void *ptr[MAX_XOR_BLOCKS];
struct buffer_head *chosen[MD_SB_DISKS]; struct bio *chosen[MD_SB_DISKS];
PRINTK("compute_parity, stripe %lu, method %d\n", sh->sector, method); PRINTK("compute_parity, stripe %lu, method %d\n", sh->sector, method);
memset(chosen, 0, sizeof(chosen)); memset(chosen, 0, sizeof(chosen));
count = 1; count = 1;
bh_ptr[0] = sh->bh_cache[pd_idx]; ptr[0] = page_address(sh->dev[pd_idx].page);
switch(method) { switch(method) {
case READ_MODIFY_WRITE: case READ_MODIFY_WRITE:
if (!buffer_uptodate(sh->bh_cache[pd_idx])) if (!test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags))
BUG(); BUG();
for (i=disks ; i-- ;) { for (i=disks ; i-- ;) {
if (i==pd_idx) if (i==pd_idx)
continue; continue;
if (sh->bh_write[i] && if (sh->dev[i].towrite &&
buffer_uptodate(sh->bh_cache[i])) { test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
bh_ptr[count++] = sh->bh_cache[i]; ptr[count++] = page_address(sh->dev[i].page);
chosen[i] = sh->bh_write[i]; chosen[i] = sh->dev[i].towrite;
sh->bh_write[i] = sh->bh_write[i]->b_reqnext; sh->dev[i].towrite = NULL;
chosen[i]->b_reqnext = sh->bh_written[i]; if (sh->dev[i].written) BUG();
sh->bh_written[i] = chosen[i]; sh->dev[i].written = chosen[i];
check_xor(); check_xor();
} }
} }
break; break;
case RECONSTRUCT_WRITE: case RECONSTRUCT_WRITE:
memset(sh->bh_cache[pd_idx]->b_data, 0, sh->size); memset(ptr[0], 0, STRIPE_SIZE);
for (i= disks; i-- ;) for (i= disks; i-- ;)
if (i!=pd_idx && sh->bh_write[i]) { if (i!=pd_idx && sh->dev[i].towrite) {
chosen[i] = sh->bh_write[i]; chosen[i] = sh->dev[i].towrite;
sh->bh_write[i] = sh->bh_write[i]->b_reqnext; sh->dev[i].towrite = NULL;
chosen[i]->b_reqnext = sh->bh_written[i]; if (sh->dev[i].written) BUG();
sh->bh_written[i] = chosen[i]; sh->dev[i].written = chosen[i];
} }
break; break;
case CHECK_PARITY: case CHECK_PARITY:
break; break;
} }
if (count>1) { if (count>1) {
xor_block(count, bh_ptr); xor_block(count, STRIPE_SIZE, ptr);
count = 1; count = 1;
} }
for (i = disks; i--;) for (i = disks; i--;)
if (chosen[i]) { if (chosen[i]) {
struct buffer_head *bh = sh->bh_cache[i]; sector_t sector = sh->dev[i].sector;
char *bdata; copy_data(1, chosen[i], sh->dev[i].page, sector);
bdata = bh_kmap(chosen[i]);
memcpy(bh->b_data, set_bit(R5_LOCKED, &sh->dev[i].flags);
bdata,sh->size); set_bit(R5_UPTODATE, &sh->dev[i].flags);
bh_kunmap(chosen[i]);
set_buffer_locked(bh);
set_buffer_uptodate(bh);
} }
switch(method) { switch(method) {
...@@ -749,55 +758,74 @@ static void compute_parity(struct stripe_head *sh, int method) ...@@ -749,55 +758,74 @@ static void compute_parity(struct stripe_head *sh, int method)
case CHECK_PARITY: case CHECK_PARITY:
for (i=disks; i--;) for (i=disks; i--;)
if (i != pd_idx) { if (i != pd_idx) {
bh_ptr[count++] = sh->bh_cache[i]; ptr[count++] = page_address(sh->dev[i].page);
check_xor(); check_xor();
} }
break; break;
case READ_MODIFY_WRITE: case READ_MODIFY_WRITE:
for (i = disks; i--;) for (i = disks; i--;)
if (chosen[i]) { if (chosen[i]) {
bh_ptr[count++] = sh->bh_cache[i]; ptr[count++] = page_address(sh->dev[i].page);
check_xor(); check_xor();
} }
} }
if (count != 1) if (count != 1)
xor_block(count, bh_ptr); xor_block(count, STRIPE_SIZE, ptr);
if (method != CHECK_PARITY) { if (method != CHECK_PARITY) {
set_buffer_uptodate(sh->bh_cache[pd_idx]); set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
set_buffer_locked(sh->bh_cache[pd_idx]); set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
} else } else
clear_buffer_uptodate(sh->bh_cache[pd_idx]); clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
} }
static void add_stripe_bh (struct stripe_head *sh, struct buffer_head *bh, int dd_idx, int rw) /*
* Each stripe/dev can have one or more bion attached.
* toread/towrite point to the first in a chain.
* The bi_next chain must be in order.
*/
static void add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite)
{ {
struct buffer_head **bhp; struct bio **bip;
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
PRINTK("adding bh b#%lu to stripe s#%lu\n", bh->b_blocknr, sh->sector); PRINTK("adding bh b#%lu to stripe s#%lu\n", bi->bi_sector, sh->sector);
spin_lock(&sh->lock); spin_lock(&sh->lock);
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
bh->b_reqnext = NULL; if (forwrite)
if (rw == READ) bip = &sh->dev[dd_idx].towrite;
bhp = &sh->bh_read[dd_idx];
else else
bhp = &sh->bh_write[dd_idx]; bip = &sh->dev[dd_idx].toread;
while (*bhp) { while (*bip && (*bip)->bi_sector < bi->bi_sector)
printk(KERN_NOTICE "raid5: multiple %d requests for sector %ld\n", rw, sh->sector); bip = & (*bip)->bi_next;
bhp = & (*bhp)->b_reqnext; /* FIXME do I need to worry about overlapping bion */
} if (*bip && bi->bi_next && (*bip) != bi->bi_next)
*bhp = bh; BUG();
if (*bip)
bi->bi_next = *bip;
*bip = bi;
bi->bi_phys_segments ++;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
spin_unlock(&sh->lock); spin_unlock(&sh->lock);
PRINTK("added bh b#%lu to stripe s#%lu, disk %d.\n", bh->b_blocknr, sh->sector, dd_idx); if (forwrite) {
} /* check if page is coverred */
sector_t sector = sh->dev[dd_idx].sector;
for (bi=sh->dev[dd_idx].towrite;
sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
bi && bi->bi_sector <= sector;
bi = bi->bi_next) {
if (bi->bi_sector + (bi->bi_size>>9) >= sector)
sector = bi->bi_sector + (bi->bi_size>>9);
}
if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
}
PRINTK("added bi b#%lu to stripe s#%lu, disk %d.\n", bi->bi_sector, sh->sector, dd_idx);
}
/* /*
...@@ -822,13 +850,14 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -822,13 +850,14 @@ static void handle_stripe(struct stripe_head *sh)
{ {
raid5_conf_t *conf = sh->raid_conf; raid5_conf_t *conf = sh->raid_conf;
int disks = conf->raid_disks; int disks = conf->raid_disks;
struct buffer_head *return_ok= NULL, *return_fail = NULL; struct bio *return_bi= NULL;
struct bio *bi;
int action[MD_SB_DISKS]; int action[MD_SB_DISKS];
int i; int i;
int syncing; int syncing;
int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0; int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
int failed_num=0; int failed_num=0;
struct buffer_head *bh; struct r5dev *dev;
PRINTK("handling stripe %ld, cnt=%d, pd_idx=%d\n", sh->sector, atomic_read(&sh->count), sh->pd_idx); PRINTK("handling stripe %ld, cnt=%d, pd_idx=%d\n", sh->sector, atomic_read(&sh->count), sh->pd_idx);
memset(action, 0, sizeof(action)); memset(action, 0, sizeof(action));
...@@ -841,36 +870,38 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -841,36 +870,38 @@ static void handle_stripe(struct stripe_head *sh)
/* Now to look around and see what can be done */ /* Now to look around and see what can be done */
for (i=disks; i--; ) { for (i=disks; i--; ) {
bh = sh->bh_cache[i]; dev = &sh->dev[i];
PRINTK("check %d: state 0x%lx read %p write %p written %p\n", i, bh->b_state, sh->bh_read[i], sh->bh_write[i], sh->bh_written[i]); PRINTK("check %d: state 0x%lx read %p write %p written %p\n", i,
dev->flags, dev->toread, dev->towrite, dev->written);
/* maybe we can reply to a read */ /* maybe we can reply to a read */
if (buffer_uptodate(bh) && sh->bh_read[i]) { if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
struct buffer_head *rbh, *rbh2; struct bio *rbi, *rbi2;
PRINTK("Return read for disc %d\n", i); PRINTK("Return read for disc %d\n", i);
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
rbh = sh->bh_read[i]; rbi = dev->toread;
sh->bh_read[i] = NULL; dev->toread = NULL;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
while (rbh) { while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
char *bdata; copy_data(0, rbi, dev->page, dev->sector);
bdata = bh_kmap(rbh); rbi2 = rbi->bi_next;
memcpy(bdata, bh->b_data, bh->b_size); spin_lock_irq(&conf->device_lock);
bh_kunmap(rbh); if (--rbi->bi_phys_segments == 0) {
rbh2 = rbh->b_reqnext; rbi->bi_next = return_bi;
rbh->b_reqnext = return_ok; return_bi = rbi;
return_ok = rbh; }
rbh = rbh2; spin_unlock_irq(&conf->device_lock);
rbi = rbi2;
} }
} }
/* now count some things */ /* now count some things */
if (buffer_locked(bh)) locked++; if (test_bit(R5_LOCKED, &dev->flags)) locked++;
if (buffer_uptodate(bh)) uptodate++; if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++;
if (sh->bh_read[i]) to_read++; if (dev->toread) to_read++;
if (sh->bh_write[i]) to_write++; if (dev->towrite) to_write++;
if (sh->bh_written[i]) written++; if (dev->written) written++;
if (!conf->disks[i].operational) { if (!conf->disks[i].operational) {
failed++; failed++;
failed_num = i; failed_num = i;
...@@ -882,29 +913,42 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -882,29 +913,42 @@ static void handle_stripe(struct stripe_head *sh)
* need to be failed * need to be failed
*/ */
if (failed > 1 && to_read+to_write) { if (failed > 1 && to_read+to_write) {
spin_lock_irq(&conf->device_lock);
for (i=disks; i--; ) { for (i=disks; i--; ) {
/* fail all writes first */ /* fail all writes first */
if (sh->bh_write[i]) to_write--; bi = sh->dev[i].towrite;
while ((bh = sh->bh_write[i])) { sh->dev[i].towrite = NULL;
sh->bh_write[i] = bh->b_reqnext; if (bi) to_write--;
bh->b_reqnext = return_fail;
return_fail = bh; while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
struct bio *nextbi = bi->bi_next;
clear_bit(BIO_UPTODATE, &bi->bi_flags);
if (--bi->bi_phys_segments == 0) {
bi->bi_next = return_bi;
return_bi = bi;
}
bi = nextbi;
} }
/* fail any reads if this device is non-operational */ /* fail any reads if this device is non-operational */
if (!conf->disks[i].operational) { if (!conf->disks[i].operational) {
spin_lock_irq(&conf->device_lock); bi = sh->dev[i].toread;
if (sh->bh_read[i]) to_read--; sh->dev[i].toread = NULL;
while ((bh = sh->bh_read[i])) { if (bi) to_read--;
sh->bh_read[i] = bh->b_reqnext; while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
bh->b_reqnext = return_fail; struct bio *nextbi = bi->bi_next;
return_fail = bh; clear_bit(BIO_UPTODATE, &bi->bi_flags);
if (--bi->bi_phys_segments == 0) {
bi->bi_next = return_bi;
return_bi = bi;
}
bi = nextbi;
} }
spin_unlock_irq(&conf->device_lock);
} }
} }
spin_unlock_irq(&conf->device_lock);
} }
if (failed > 1 && syncing) { if (failed > 1 && syncing) {
md_done_sync(conf->mddev, (sh->size>>9) - sh->sync_redone,0); md_done_sync(conf->mddev, STRIPE_SECTORS,0);
clear_bit(STRIPE_SYNCING, &sh->state); clear_bit(STRIPE_SYNCING, &sh->state);
syncing = 0; syncing = 0;
} }
...@@ -912,40 +956,43 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -912,40 +956,43 @@ static void handle_stripe(struct stripe_head *sh)
/* might be able to return some write requests if the parity block /* might be able to return some write requests if the parity block
* is safe, or on a failed drive * is safe, or on a failed drive
*/ */
bh = sh->bh_cache[sh->pd_idx]; dev = &sh->dev[sh->pd_idx];
if ( written && if ( written &&
( (conf->disks[sh->pd_idx].operational && !buffer_locked(bh) && buffer_uptodate(bh)) ( (conf->disks[sh->pd_idx].operational && !test_bit(R5_LOCKED, &dev->flags) &&
test_bit(R5_UPTODATE, &dev->flags))
|| (failed == 1 && failed_num == sh->pd_idx)) || (failed == 1 && failed_num == sh->pd_idx))
) { ) {
/* any written block on a uptodate or failed drive can be returned */ /* any written block on an uptodate or failed drive can be returned */
for (i=disks; i--; ) for (i=disks; i--; )
if (sh->bh_written[i]) { if (sh->dev[i].written) {
bh = sh->bh_cache[i]; dev = &sh->dev[i];
if (!conf->disks[sh->pd_idx].operational || if (!conf->disks[sh->pd_idx].operational ||
(!buffer_locked(bh) && buffer_uptodate(bh)) ) { (!test_bit(R5_LOCKED, &dev->flags) && test_bit(R5_UPTODATE, &dev->flags)) ) {
/* maybe we can return some write requests */ /* maybe we can return some write requests */
struct buffer_head *wbh, *wbh2; struct bio *wbi, *wbi2;
PRINTK("Return write for disc %d\n", i); PRINTK("Return write for disc %d\n", i);
wbh = sh->bh_written[i]; wbi = dev->written;
sh->bh_written[i] = NULL; dev->written = NULL;
while (wbh) { while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
wbh2 = wbh->b_reqnext; wbi2 = wbi->bi_next;
wbh->b_reqnext = return_ok; if (--wbi->bi_phys_segments == 0) {
return_ok = wbh; wbi->bi_next = return_bi;
wbh = wbh2; return_bi = wbi;
} }
wbi = wbi2;
}
} }
} }
} }
/* Now we might consider reading some blocks, either to check/generate /* Now we might consider reading some blocks, either to check/generate
* parity, or to satisfy requests * parity, or to satisfy requests
*/ */
if (to_read || (syncing && (uptodate+failed < disks))) { if (to_read || (syncing && (uptodate+failed < disks))) {
for (i=disks; i--;) { for (i=disks; i--;) {
bh = sh->bh_cache[i]; dev = &sh->dev[i];
if (!buffer_locked(bh) && !buffer_uptodate(bh) && if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
(sh->bh_read[i] || syncing || (failed && sh->bh_read[failed_num]))) { (dev->toread || syncing || (failed && sh->dev[failed_num].toread))) {
/* we would like to get this block, possibly /* we would like to get this block, possibly
* by computing it, but we might not be able to * by computing it, but we might not be able to
*/ */
...@@ -954,21 +1001,21 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -954,21 +1001,21 @@ static void handle_stripe(struct stripe_head *sh)
compute_block(sh, i); compute_block(sh, i);
uptodate++; uptodate++;
} else if (conf->disks[i].operational) { } else if (conf->disks[i].operational) {
set_buffer_locked(bh); set_bit(R5_LOCKED, &dev->flags);
action[i] = READ+1; action[i] = READ+1;
#if 0
/* if I am just reading this block and we don't have /* if I am just reading this block and we don't have
a failed drive, or any pending writes then sidestep the cache */ a failed drive, or any pending writes then sidestep the cache */
if (sh->bh_page[i]) BUG();
if (sh->bh_read[i] && !sh->bh_read[i]->b_reqnext && if (sh->bh_read[i] && !sh->bh_read[i]->b_reqnext &&
! syncing && !failed && !to_write) { ! syncing && !failed && !to_write) {
sh->bh_page[i] = sh->bh_cache[i]->b_page;
sh->bh_cache[i]->b_page = sh->bh_read[i]->b_page; sh->bh_cache[i]->b_page = sh->bh_read[i]->b_page;
sh->bh_cache[i]->b_data = sh->bh_read[i]->b_data; sh->bh_cache[i]->b_data = sh->bh_read[i]->b_data;
} }
#endif
locked++; locked++;
PRINTK("Reading block %d (sync=%d)\n", i, syncing); PRINTK("Reading block %d (sync=%d)\n", i, syncing);
if (syncing) if (syncing)
md_sync_acct(conf->disks[i].dev, bh->b_size>>9); md_sync_acct(conf->disks[i].dev, STRIPE_SECTORS);
} }
} }
} }
...@@ -980,10 +1027,14 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -980,10 +1027,14 @@ static void handle_stripe(struct stripe_head *sh)
int rmw=0, rcw=0; int rmw=0, rcw=0;
for (i=disks ; i--;) { for (i=disks ; i--;) {
/* would I have to read this buffer for read_modify_write */ /* would I have to read this buffer for read_modify_write */
bh = sh->bh_cache[i]; dev = &sh->dev[i];
if ((sh->bh_write[i] || i == sh->pd_idx) && if ((dev->towrite || i == sh->pd_idx) &&
(!buffer_locked(bh) || sh->bh_page[i]) && (!test_bit(R5_LOCKED, &dev->flags)
!buffer_uptodate(bh)) { #if 0
|| sh->bh_page[i]!=bh->b_page
#endif
) &&
!test_bit(R5_UPTODATE, &dev->flags)) {
if (conf->disks[i].operational if (conf->disks[i].operational
/* && !(conf->resync_parity && i == sh->pd_idx) */ /* && !(conf->resync_parity && i == sh->pd_idx) */
) )
...@@ -991,9 +1042,13 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -991,9 +1042,13 @@ static void handle_stripe(struct stripe_head *sh)
else rmw += 2*disks; /* cannot read it */ else rmw += 2*disks; /* cannot read it */
} }
/* Would I have to read this buffer for reconstruct_write */ /* Would I have to read this buffer for reconstruct_write */
if (!sh->bh_write[i] && i != sh->pd_idx && if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
(!buffer_locked(bh) || sh->bh_page[i]) && (!test_bit(R5_LOCKED, &dev->flags)
!buffer_uptodate(bh)) { #if 0
|| sh->bh_page[i] != bh->b_page
#endif
) &&
!test_bit(R5_UPTODATE, &dev->flags)) {
if (conf->disks[i].operational) rcw++; if (conf->disks[i].operational) rcw++;
else rcw += 2*disks; else rcw += 2*disks;
} }
...@@ -1003,14 +1058,14 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1003,14 +1058,14 @@ static void handle_stripe(struct stripe_head *sh)
if (rmw < rcw && rmw > 0) if (rmw < rcw && rmw > 0)
/* prefer read-modify-write, but need to get some data */ /* prefer read-modify-write, but need to get some data */
for (i=disks; i--;) { for (i=disks; i--;) {
bh = sh->bh_cache[i]; dev = &sh->dev[i];
if ((sh->bh_write[i] || i == sh->pd_idx) && if ((dev->towrite || i == sh->pd_idx) &&
!buffer_locked(bh) && !buffer_uptodate(bh) && !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
conf->disks[i].operational) { conf->disks[i].operational) {
if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
{ {
PRINTK("Read_old block %d for r-m-w\n", i); PRINTK("Read_old block %d for r-m-w\n", i);
set_buffer_locked(bh); set_bit(R5_LOCKED, &dev->flags);
action[i] = READ+1; action[i] = READ+1;
locked++; locked++;
} else { } else {
...@@ -1022,14 +1077,14 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1022,14 +1077,14 @@ static void handle_stripe(struct stripe_head *sh)
if (rcw <= rmw && rcw > 0) if (rcw <= rmw && rcw > 0)
/* want reconstruct write, but need to get some data */ /* want reconstruct write, but need to get some data */
for (i=disks; i--;) { for (i=disks; i--;) {
bh = sh->bh_cache[i]; dev = &sh->dev[i];
if (!sh->bh_write[i] && i != sh->pd_idx && if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
!buffer_locked(bh) && !buffer_uptodate(bh) && !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
conf->disks[i].operational) { conf->disks[i].operational) {
if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
{ {
PRINTK("Read_old block %d for Reconstruct\n", i); PRINTK("Read_old block %d for Reconstruct\n", i);
set_buffer_locked(bh); set_bit(R5_LOCKED, &dev->flags);
action[i] = READ+1; action[i] = READ+1;
locked++; locked++;
} else { } else {
...@@ -1044,7 +1099,7 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1044,7 +1099,7 @@ static void handle_stripe(struct stripe_head *sh)
compute_parity(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE); compute_parity(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
/* now every locked buffer is ready to be written */ /* now every locked buffer is ready to be written */
for (i=disks; i--;) for (i=disks; i--;)
if (buffer_locked(sh->bh_cache[i])) { if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
PRINTK("Writing block %d\n", i); PRINTK("Writing block %d\n", i);
locked++; locked++;
action[i] = WRITE+1; action[i] = WRITE+1;
...@@ -1068,13 +1123,14 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1068,13 +1123,14 @@ static void handle_stripe(struct stripe_head *sh)
!test_bit(STRIPE_INSYNC, &sh->state) && failed <= 1) { !test_bit(STRIPE_INSYNC, &sh->state) && failed <= 1) {
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
if (failed == 0) { if (failed == 0) {
char *pagea;
if (uptodate != disks) if (uptodate != disks)
BUG(); BUG();
compute_parity(sh, CHECK_PARITY); compute_parity(sh, CHECK_PARITY);
uptodate--; uptodate--;
bh = sh->bh_cache[sh->pd_idx]; pagea = page_address(sh->dev[sh->pd_idx].page);
if ((*(u32*)bh->b_data) == 0 && if ((*(u32*)pagea) == 0 &&
!memcmp(bh->b_data, bh->b_data+4, bh->b_size-4)) { !memcmp(pagea, pagea+4, STRIPE_SIZE-4)) {
/* parity is correct (on disc, not in buffer any more) */ /* parity is correct (on disc, not in buffer any more) */
set_bit(STRIPE_INSYNC, &sh->state); set_bit(STRIPE_INSYNC, &sh->state);
} }
...@@ -1084,7 +1140,7 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1084,7 +1140,7 @@ static void handle_stripe(struct stripe_head *sh)
if (failed==0) if (failed==0)
failed_num = sh->pd_idx; failed_num = sh->pd_idx;
/* should be able to compute the missing block and write it to spare */ /* should be able to compute the missing block and write it to spare */
if (!buffer_uptodate(sh->bh_cache[failed_num])) { if (!test_bit(R5_UPTODATE, &sh->dev[failed_num].flags)) {
if (uptodate+1 != disks) if (uptodate+1 != disks)
BUG(); BUG();
compute_block(sh, failed_num); compute_block(sh, failed_num);
...@@ -1092,60 +1148,62 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1092,60 +1148,62 @@ static void handle_stripe(struct stripe_head *sh)
} }
if (uptodate != disks) if (uptodate != disks)
BUG(); BUG();
bh = sh->bh_cache[failed_num]; dev = &sh->dev[failed_num];
set_buffer_locked(bh); set_bit(R5_LOCKED, &dev->flags);
action[failed_num] = WRITE+1; action[failed_num] = WRITE+1;
locked++; locked++;
set_bit(STRIPE_INSYNC, &sh->state); set_bit(STRIPE_INSYNC, &sh->state);
if (conf->disks[failed_num].operational) if (conf->disks[failed_num].operational)
md_sync_acct(conf->disks[failed_num].dev, bh->b_size>>9); md_sync_acct(conf->disks[failed_num].dev, STRIPE_SECTORS);
else if ((spare=conf->spare)) else if ((spare=conf->spare))
md_sync_acct(spare->dev, bh->b_size>>9); md_sync_acct(spare->dev, STRIPE_SECTORS);
} }
} }
if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
md_done_sync(conf->mddev, (sh->size>>9) - sh->sync_redone,1); md_done_sync(conf->mddev, STRIPE_SECTORS,1);
clear_bit(STRIPE_SYNCING, &sh->state); clear_bit(STRIPE_SYNCING, &sh->state);
} }
spin_unlock(&sh->lock); spin_unlock(&sh->lock);
while ((bh=return_ok)) { while ((bi=return_bi)) {
return_ok = bh->b_reqnext; return_bi = bi->bi_next;
bh->b_reqnext = NULL; bi->bi_next = NULL;
bh->b_end_io(bh, 1); bi->bi_end_io(bi);
}
while ((bh=return_fail)) {
return_fail = bh->b_reqnext;
bh->b_reqnext = NULL;
bh->b_end_io(bh, 0);
} }
for (i=disks; i-- ;) for (i=disks; i-- ;)
if (action[i]) { if (action[i]) {
struct buffer_head *bh = sh->bh_cache[i]; struct bio *bi = &sh->dev[i].req;
struct disk_info *spare = conf->spare; struct disk_info *spare = conf->spare;
int skip = 0; int skip = 0;
if (action[i] == READ+1) if (action[i] == READ+1)
bh->b_end_io = raid5_end_read_request; bi->bi_end_io = raid5_end_read_request;
else else
bh->b_end_io = raid5_end_write_request; bi->bi_end_io = raid5_end_write_request;
if (conf->disks[i].operational) if (conf->disks[i].operational)
bh->b_dev = conf->disks[i].dev; bi->bi_bdev = conf->disks[i].bdev;
else if (spare && action[i] == WRITE+1) else if (spare && action[i] == WRITE+1)
bh->b_dev = spare->dev; bi->bi_bdev = spare->bdev;
else skip=1; else skip=1;
/* FIXME - later we will need bdev here */
if (!skip) { if (!skip) {
PRINTK("for %ld schedule op %d on disc %d\n", sh->sector, action[i]-1, i); PRINTK("for %ld schedule op %d on disc %d\n", sh->sector, action[i]-1, i);
atomic_inc(&sh->count); atomic_inc(&sh->count);
bh->b_rdev = bh->b_dev; bi->bi_sector = sh->sector;
bh->b_rsector = bh->b_blocknr * (bh->b_size>>9); if (action[i] == READ+1)
generic_make_request(action[i]-1, bh); bi->bi_rw = 0;
else
bi->bi_rw = 1;
bi->bi_flags = 0;
bi->bi_vcnt = 1;
bi->bi_idx = 0;
bi->bi_io_vec = &sh->dev[i].vec;
bi->bi_size = STRIPE_SIZE;
bi->bi_next = NULL;
generic_make_request(bi);
} else { } else {
PRINTK("skip op %d on disc %d for sector %ld\n", action[i]-1, i, sh->sector); PRINTK("skip op %d on disc %d for sector %ld\n", action[i]-1, i, sh->sector);
clear_buffer_locked(bh); clear_bit(R5_LOCKED, &dev->flags);
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
} }
} }
...@@ -1192,13 +1250,14 @@ static inline void raid5_plug_device(raid5_conf_t *conf) ...@@ -1192,13 +1250,14 @@ static inline void raid5_plug_device(raid5_conf_t *conf)
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
} }
static int raid5_make_request (mddev_t *mddev, int rw, struct buffer_head * bh) static int make_request (mddev_t *mddev, int rw, struct bio * bi)
{ {
raid5_conf_t *conf = (raid5_conf_t *) mddev->private; raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
const unsigned int raid_disks = conf->raid_disks; const unsigned int raid_disks = conf->raid_disks;
const unsigned int data_disks = raid_disks - 1; const unsigned int data_disks = raid_disks - 1;
unsigned int dd_idx, pd_idx; unsigned int dd_idx, pd_idx;
unsigned long new_sector; sector_t new_sector;
sector_t logical_sector, last_sector;
int read_ahead = 0; int read_ahead = 0;
struct stripe_head *sh; struct stripe_head *sh;
...@@ -1208,25 +1267,39 @@ static int raid5_make_request (mddev_t *mddev, int rw, struct buffer_head * bh) ...@@ -1208,25 +1267,39 @@ static int raid5_make_request (mddev_t *mddev, int rw, struct buffer_head * bh)
read_ahead=1; read_ahead=1;
} }
new_sector = raid5_compute_sector(bh->b_rsector, logical_sector = bi->bi_sector & ~(STRIPE_SECTORS-1);
raid_disks, data_disks, &dd_idx, &pd_idx, conf); last_sector = bi->bi_sector + (bi->bi_size>>9);
PRINTK("raid5_make_request, sector %lu\n", new_sector); bi->bi_next = NULL;
sh = get_active_stripe(conf, new_sector, bh->b_size, read_ahead); set_bit(BIO_UPTODATE, &bi->bi_flags); /* will be cleared if error detected */
if (sh) { bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
sh->pd_idx = pd_idx; for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
new_sector = raid5_compute_sector(logical_sector,
raid_disks, data_disks, &dd_idx, &pd_idx, conf);
add_stripe_bh(sh, bh, dd_idx, rw); PRINTK("raid5: make_request, sector %ul logical %ul\n",
new_sector, logical_sector);
raid5_plug_device(conf); sh = get_active_stripe(conf, new_sector, pd_idx, read_ahead);
handle_stripe(sh); if (sh) {
release_stripe(sh);
} else add_stripe_bio(sh, bi, dd_idx, rw);
bh->b_end_io(bh, buffer_uptodate(bh));
raid5_plug_device(conf);
handle_stripe(sh);
release_stripe(sh);
}
}
spin_lock_irq(&conf->device_lock);
if (--bi->bi_phys_segments == 0)
bi->bi_end_io(bi);
spin_unlock_irq(&conf->device_lock);
return 0; return 0;
} }
static int raid5_sync_request (mddev_t *mddev, unsigned long sector_nr) /* FIXME go_faster isn't used */
static int sync_request (mddev_t *mddev, sector_t sector_nr, int go_faster)
{ {
raid5_conf_t *conf = (raid5_conf_t *) mddev->private; raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
struct stripe_head *sh; struct stripe_head *sh;
...@@ -1237,25 +1310,19 @@ static int raid5_sync_request (mddev_t *mddev, unsigned long sector_nr) ...@@ -1237,25 +1310,19 @@ static int raid5_sync_request (mddev_t *mddev, unsigned long sector_nr)
unsigned long first_sector; unsigned long first_sector;
int raid_disks = conf->raid_disks; int raid_disks = conf->raid_disks;
int data_disks = raid_disks-1; int data_disks = raid_disks-1;
int redone = 0;
int bufsize;
sh = get_active_stripe(conf, sector_nr, 0, 0);
bufsize = sh->size;
redone = sector_nr - sh->sector;
first_sector = raid5_compute_sector(stripe*data_disks*sectors_per_chunk first_sector = raid5_compute_sector(stripe*data_disks*sectors_per_chunk
+ chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf); + chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf);
sh->pd_idx = pd_idx; sh = get_active_stripe(conf, sector_nr, pd_idx, 0);
spin_lock(&sh->lock); spin_lock(&sh->lock);
set_bit(STRIPE_SYNCING, &sh->state); set_bit(STRIPE_SYNCING, &sh->state);
clear_bit(STRIPE_INSYNC, &sh->state); clear_bit(STRIPE_INSYNC, &sh->state);
sh->sync_redone = redone;
spin_unlock(&sh->lock); spin_unlock(&sh->lock);
handle_stripe(sh); handle_stripe(sh);
release_stripe(sh); release_stripe(sh);
return (bufsize>>9)-redone; return STRIPE_SECTORS;
} }
/* /*
...@@ -1280,7 +1347,7 @@ static void raid5d (void *data) ...@@ -1280,7 +1347,7 @@ static void raid5d (void *data)
mddev->sb_dirty = 0; mddev->sb_dirty = 0;
md_update_sb(mddev); md_update_sb(mddev);
} }
md_spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
while (1) { while (1) {
struct list_head *first; struct list_head *first;
...@@ -1300,17 +1367,17 @@ static void raid5d (void *data) ...@@ -1300,17 +1367,17 @@ static void raid5d (void *data)
atomic_inc(&sh->count); atomic_inc(&sh->count);
if (atomic_read(&sh->count)!= 1) if (atomic_read(&sh->count)!= 1)
BUG(); BUG();
md_spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
handled++; handled++;
handle_stripe(sh); handle_stripe(sh);
release_stripe(sh); release_stripe(sh);
md_spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
} }
PRINTK("%d stripes handled\n", handled); PRINTK("%d stripes handled\n", handled);
md_spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
PRINTK("--- raid5d inactive\n"); PRINTK("--- raid5d inactive\n");
} }
...@@ -1340,7 +1407,7 @@ static void raid5syncd (void *data) ...@@ -1340,7 +1407,7 @@ static void raid5syncd (void *data)
printk("raid5: resync finished.\n"); printk("raid5: resync finished.\n");
} }
static int raid5_run (mddev_t *mddev) static int run (mddev_t *mddev)
{ {
raid5_conf_t *conf; raid5_conf_t *conf;
int i, j, raid_disk, memory; int i, j, raid_disk, memory;
...@@ -1348,7 +1415,7 @@ static int raid5_run (mddev_t *mddev) ...@@ -1348,7 +1415,7 @@ static int raid5_run (mddev_t *mddev)
mdp_disk_t *desc; mdp_disk_t *desc;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct disk_info *disk; struct disk_info *disk;
struct md_list_head *tmp; struct list_head *tmp;
int start_recovery = 0; int start_recovery = 0;
MOD_INC_USE_COUNT; MOD_INC_USE_COUNT;
...@@ -1365,25 +1432,24 @@ static int raid5_run (mddev_t *mddev) ...@@ -1365,25 +1432,24 @@ static int raid5_run (mddev_t *mddev)
memset (conf, 0, sizeof (*conf)); memset (conf, 0, sizeof (*conf));
conf->mddev = mddev; conf->mddev = mddev;
if ((conf->stripe_hashtbl = (struct stripe_head **) md__get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL) if ((conf->stripe_hashtbl = (struct stripe_head **) __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL)
goto abort; goto abort;
memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE); memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE);
conf->device_lock = MD_SPIN_LOCK_UNLOCKED; conf->device_lock = SPIN_LOCK_UNLOCKED;
md_init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_stripe);
INIT_LIST_HEAD(&conf->handle_list); INIT_LIST_HEAD(&conf->handle_list);
INIT_LIST_HEAD(&conf->delayed_list); INIT_LIST_HEAD(&conf->delayed_list);
INIT_LIST_HEAD(&conf->inactive_list); INIT_LIST_HEAD(&conf->inactive_list);
atomic_set(&conf->active_stripes, 0); atomic_set(&conf->active_stripes, 0);
atomic_set(&conf->preread_active_stripes, 0); atomic_set(&conf->preread_active_stripes, 0);
conf->buffer_size = PAGE_SIZE; /* good default for rebuild */
conf->plugged = 0; conf->plugged = 0;
conf->plug_tq.sync = 0; conf->plug_tq.sync = 0;
conf->plug_tq.routine = &raid5_unplug_device; conf->plug_tq.routine = &raid5_unplug_device;
conf->plug_tq.data = conf; conf->plug_tq.data = conf;
PRINTK("raid5_run(md%d) called.\n", mdidx(mddev)); PRINTK("raid5: run(md%d) called.\n", mdidx(mddev));
ITERATE_RDEV(mddev,rdev,tmp) { ITERATE_RDEV(mddev,rdev,tmp) {
/* /*
...@@ -1404,6 +1470,7 @@ static int raid5_run (mddev_t *mddev) ...@@ -1404,6 +1470,7 @@ static int raid5_run (mddev_t *mddev)
disk->number = desc->number; disk->number = desc->number;
disk->raid_disk = raid_disk; disk->raid_disk = raid_disk;
disk->dev = rdev->dev; disk->dev = rdev->dev;
disk->bdev = rdev->bdev;
disk->operational = 0; disk->operational = 0;
disk->write_only = 0; disk->write_only = 0;
...@@ -1430,6 +1497,7 @@ static int raid5_run (mddev_t *mddev) ...@@ -1430,6 +1497,7 @@ static int raid5_run (mddev_t *mddev)
disk->number = desc->number; disk->number = desc->number;
disk->raid_disk = raid_disk; disk->raid_disk = raid_disk;
disk->dev = rdev->dev; disk->dev = rdev->dev;
disk->bdev = rdev->bdev;
disk->operational = 1; disk->operational = 1;
disk->used_slot = 1; disk->used_slot = 1;
...@@ -1442,6 +1510,7 @@ static int raid5_run (mddev_t *mddev) ...@@ -1442,6 +1510,7 @@ static int raid5_run (mddev_t *mddev)
disk->number = desc->number; disk->number = desc->number;
disk->raid_disk = raid_disk; disk->raid_disk = raid_disk;
disk->dev = rdev->dev; disk->dev = rdev->dev;
disk->bdev = rdev->bdev;
disk->operational = 0; disk->operational = 0;
disk->write_only = 0; disk->write_only = 0;
...@@ -1461,6 +1530,7 @@ static int raid5_run (mddev_t *mddev) ...@@ -1461,6 +1530,7 @@ static int raid5_run (mddev_t *mddev)
disk->number = desc->number; disk->number = desc->number;
disk->raid_disk = raid_disk; disk->raid_disk = raid_disk;
disk->dev = NODEV; disk->dev = NODEV;
disk->bdev = NULL;
disk->operational = 0; disk->operational = 0;
disk->write_only = 0; disk->write_only = 0;
...@@ -1518,9 +1588,9 @@ static int raid5_run (mddev_t *mddev) ...@@ -1518,9 +1588,9 @@ static int raid5_run (mddev_t *mddev)
memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
conf->raid_disks * ((sizeof(struct buffer_head) + PAGE_SIZE))) / 1024; conf->raid_disks * ((sizeof(struct buffer_head) + PAGE_SIZE))) / 1024;
if (grow_stripes(conf, conf->max_nr_stripes, GFP_KERNEL)) { if (grow_stripes(conf, conf->max_nr_stripes)) {
printk(KERN_ERR "raid5: couldn't allocate %dkB for buffers\n", memory); printk(KERN_ERR "raid5: couldn't allocate %dkB for buffers\n", memory);
shrink_stripes(conf, conf->max_nr_stripes); shrink_stripes(conf);
goto abort; goto abort;
} else } else
printk(KERN_INFO "raid5: allocated %dkB for md%d\n", memory, mdidx(mddev)); printk(KERN_INFO "raid5: allocated %dkB for md%d\n", memory, mdidx(mddev));
...@@ -1580,7 +1650,7 @@ static int raid5_run (mddev_t *mddev) ...@@ -1580,7 +1650,7 @@ static int raid5_run (mddev_t *mddev)
return -EIO; return -EIO;
} }
static int raid5_stop_resync (mddev_t *mddev) static int stop_resync (mddev_t *mddev)
{ {
raid5_conf_t *conf = mddev_to_conf(mddev); raid5_conf_t *conf = mddev_to_conf(mddev);
mdk_thread_t *thread = conf->resync_thread; mdk_thread_t *thread = conf->resync_thread;
...@@ -1597,7 +1667,7 @@ static int raid5_stop_resync (mddev_t *mddev) ...@@ -1597,7 +1667,7 @@ static int raid5_stop_resync (mddev_t *mddev)
return 0; return 0;
} }
static int raid5_restart_resync (mddev_t *mddev) static int restart_resync (mddev_t *mddev)
{ {
raid5_conf_t *conf = mddev_to_conf(mddev); raid5_conf_t *conf = mddev_to_conf(mddev);
...@@ -1616,14 +1686,14 @@ static int raid5_restart_resync (mddev_t *mddev) ...@@ -1616,14 +1686,14 @@ static int raid5_restart_resync (mddev_t *mddev)
} }
static int raid5_stop (mddev_t *mddev) static int stop (mddev_t *mddev)
{ {
raid5_conf_t *conf = (raid5_conf_t *) mddev->private; raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
if (conf->resync_thread) if (conf->resync_thread)
md_unregister_thread(conf->resync_thread); md_unregister_thread(conf->resync_thread);
md_unregister_thread(conf->thread); md_unregister_thread(conf->thread);
shrink_stripes(conf, conf->max_nr_stripes); shrink_stripes(conf);
free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER);
kfree(conf); kfree(conf);
mddev->private = NULL; mddev->private = NULL;
...@@ -1636,12 +1706,11 @@ static void print_sh (struct stripe_head *sh) ...@@ -1636,12 +1706,11 @@ static void print_sh (struct stripe_head *sh)
{ {
int i; int i;
printk("sh %lu, size %d, pd_idx %d, state %ld.\n", sh->sector, sh->size, sh->pd_idx, sh->state); printk("sh %lu, pd_idx %d, state %ld.\n", sh->sector, sh->pd_idx, sh->state);
printk("sh %lu, count %d.\n", sh->sector, atomic_read(&sh->count)); printk("sh %lu, count %d.\n", sh->sector, atomic_read(&sh->count));
printk("sh %lu, ", sh->sector); printk("sh %lu, ", sh->sector);
for (i = 0; i < MD_SB_DISKS; i++) { for (i = 0; i < sh->raid_conf->raid_disks; i++) {
if (sh->bh_cache[i]) printk("(cache%d: %p %ld) ", i, sh->dev[i].page, sh->dev[i].flags);
printk("(cache%d: %p %ld) ", i, sh->bh_cache[i], sh->bh_cache[i]->b_state);
} }
printk("\n"); printk("\n");
} }
...@@ -1651,7 +1720,7 @@ static void printall (raid5_conf_t *conf) ...@@ -1651,7 +1720,7 @@ static void printall (raid5_conf_t *conf)
struct stripe_head *sh; struct stripe_head *sh;
int i; int i;
md_spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
for (i = 0; i < NR_HASH; i++) { for (i = 0; i < NR_HASH; i++) {
sh = conf->stripe_hashtbl[i]; sh = conf->stripe_hashtbl[i];
for (; sh; sh = sh->hash_next) { for (; sh; sh = sh->hash_next) {
...@@ -1660,13 +1729,13 @@ static void printall (raid5_conf_t *conf) ...@@ -1660,13 +1729,13 @@ static void printall (raid5_conf_t *conf)
print_sh(sh); print_sh(sh);
} }
} }
md_spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
PRINTK("--- raid5d inactive\n"); PRINTK("--- raid5d inactive\n");
} }
#endif #endif
static int raid5_status (char *page, mddev_t *mddev) static int status (char *page, mddev_t *mddev)
{ {
raid5_conf_t *conf = (raid5_conf_t *) mddev->private; raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
mdp_super_t *sb = mddev->sb; mdp_super_t *sb = mddev->sb;
...@@ -1711,7 +1780,7 @@ static void print_raid5_conf (raid5_conf_t *conf) ...@@ -1711,7 +1780,7 @@ static void print_raid5_conf (raid5_conf_t *conf)
} }
} }
static int raid5_diskop(mddev_t *mddev, mdp_disk_t **d, int state) static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
{ {
int err = 0; int err = 0;
int i, failed_disk=-1, spare_disk=-1, removed_disk=-1, added_disk=-1; int i, failed_disk=-1, spare_disk=-1, removed_disk=-1, added_disk=-1;
...@@ -1722,7 +1791,7 @@ static int raid5_diskop(mddev_t *mddev, mdp_disk_t **d, int state) ...@@ -1722,7 +1791,7 @@ static int raid5_diskop(mddev_t *mddev, mdp_disk_t **d, int state)
mdk_rdev_t *spare_rdev, *failed_rdev; mdk_rdev_t *spare_rdev, *failed_rdev;
print_raid5_conf(conf); print_raid5_conf(conf);
md_spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
/* /*
* find the disk ... * find the disk ...
*/ */
...@@ -1948,6 +2017,7 @@ static int raid5_diskop(mddev_t *mddev, mdp_disk_t **d, int state) ...@@ -1948,6 +2017,7 @@ static int raid5_diskop(mddev_t *mddev, mdp_disk_t **d, int state)
goto abort; goto abort;
} }
rdisk->dev = NODEV; rdisk->dev = NODEV;
rdisk->bdev = NULL;
rdisk->used_slot = 0; rdisk->used_slot = 0;
break; break;
...@@ -1965,6 +2035,8 @@ static int raid5_diskop(mddev_t *mddev, mdp_disk_t **d, int state) ...@@ -1965,6 +2035,8 @@ static int raid5_diskop(mddev_t *mddev, mdp_disk_t **d, int state)
adisk->number = added_desc->number; adisk->number = added_desc->number;
adisk->raid_disk = added_desc->raid_disk; adisk->raid_disk = added_desc->raid_disk;
adisk->dev = mk_kdev(added_desc->major,added_desc->minor); adisk->dev = mk_kdev(added_desc->major,added_desc->minor);
/* it will be held open by rdev */
adisk->bdev = bdget(kdev_t_to_nr(adisk->dev));
adisk->operational = 0; adisk->operational = 0;
adisk->write_only = 0; adisk->write_only = 0;
...@@ -1980,7 +2052,7 @@ static int raid5_diskop(mddev_t *mddev, mdp_disk_t **d, int state) ...@@ -1980,7 +2052,7 @@ static int raid5_diskop(mddev_t *mddev, mdp_disk_t **d, int state)
goto abort; goto abort;
} }
abort: abort:
md_spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
print_raid5_conf(conf); print_raid5_conf(conf);
return err; return err;
} }
...@@ -1988,18 +2060,18 @@ static int raid5_diskop(mddev_t *mddev, mdp_disk_t **d, int state) ...@@ -1988,18 +2060,18 @@ static int raid5_diskop(mddev_t *mddev, mdp_disk_t **d, int state)
static mdk_personality_t raid5_personality= static mdk_personality_t raid5_personality=
{ {
name: "raid5", name: "raid5",
make_request: raid5_make_request, make_request: make_request,
run: raid5_run, run: run,
stop: raid5_stop, stop: stop,
status: raid5_status, status: status,
error_handler: raid5_error, error_handler: error,
diskop: raid5_diskop, diskop: diskop,
stop_resync: raid5_stop_resync, stop_resync: stop_resync,
restart_resync: raid5_restart_resync, restart_resync: restart_resync,
sync_request: raid5_sync_request sync_request: sync_request
}; };
static int md__init raid5_init (void) static int __init raid5_init (void)
{ {
return register_md_personality (RAID5, &raid5_personality); return register_md_personality (RAID5, &raid5_personality);
} }
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#define BH_TRACE 0 #define BH_TRACE 0
#include <linux/module.h> #include <linux/module.h>
#include <linux/raid/md.h> #include <linux/raid/md.h>
#include <linux/raid/md_compatible.h>
#include <linux/raid/xor.h> #include <linux/raid/xor.h>
#include <asm/xor.h> #include <asm/xor.h>
...@@ -27,31 +26,30 @@ ...@@ -27,31 +26,30 @@
static struct xor_block_template *active_template; static struct xor_block_template *active_template;
void void
xor_block(unsigned int count, struct buffer_head **bh_ptr) xor_block(unsigned int count, unsigned int bytes, void **ptr)
{ {
unsigned long *p0, *p1, *p2, *p3, *p4; unsigned long *p0, *p1, *p2, *p3, *p4;
unsigned long bytes = bh_ptr[0]->b_size;
p0 = (unsigned long *) bh_ptr[0]->b_data; p0 = (unsigned long *) ptr[0];
p1 = (unsigned long *) bh_ptr[1]->b_data; p1 = (unsigned long *) ptr[1];
if (count == 2) { if (count == 2) {
active_template->do_2(bytes, p0, p1); active_template->do_2(bytes, p0, p1);
return; return;
} }
p2 = (unsigned long *) bh_ptr[2]->b_data; p2 = (unsigned long *) ptr[2];
if (count == 3) { if (count == 3) {
active_template->do_3(bytes, p0, p1, p2); active_template->do_3(bytes, p0, p1, p2);
return; return;
} }
p3 = (unsigned long *) bh_ptr[3]->b_data; p3 = (unsigned long *) ptr[3];
if (count == 4) { if (count == 4) {
active_template->do_4(bytes, p0, p1, p2, p3); active_template->do_4(bytes, p0, p1, p2, p3);
return; return;
} }
p4 = (unsigned long *) bh_ptr[4]->b_data; p4 = (unsigned long *) ptr[4];
active_template->do_5(bytes, p0, p1, p2, p3, p4); active_template->do_5(bytes, p0, p1, p2, p3, p4);
} }
...@@ -103,7 +101,7 @@ calibrate_xor_block(void) ...@@ -103,7 +101,7 @@ calibrate_xor_block(void)
void *b1, *b2; void *b1, *b2;
struct xor_block_template *f, *fastest; struct xor_block_template *f, *fastest;
b1 = (void *) md__get_free_pages(GFP_KERNEL, 2); b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
if (! b1) { if (! b1) {
printk("raid5: Yikes! No memory available.\n"); printk("raid5: Yikes! No memory available.\n");
return -ENOMEM; return -ENOMEM;
...@@ -137,7 +135,7 @@ calibrate_xor_block(void) ...@@ -137,7 +135,7 @@ calibrate_xor_block(void)
return 0; return 0;
} }
MD_EXPORT_SYMBOL(xor_block); EXPORT_SYMBOL(xor_block);
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
module_init(calibrate_xor_block); module_init(calibrate_xor_block);
...@@ -105,9 +105,8 @@ int presto_set_ext_attr(struct inode *inode, ...@@ -105,9 +105,8 @@ int presto_set_ext_attr(struct inode *inode,
printk("InterMezzo: out of memory!!!\n"); printk("InterMezzo: out of memory!!!\n");
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(buf, buffer, buffer_len); if (copy_from_user(buf, buffer, buffer_len))
if (error) return -EFAULT;
return error;
} else } else
buf = buffer; buf = buffer;
} else } else
......
...@@ -31,10 +31,9 @@ int begin_kml_reint (struct file *file, unsigned long arg) ...@@ -31,10 +31,9 @@ int begin_kml_reint (struct file *file, unsigned long arg)
ENTRY; ENTRY;
/* allocate buffer & copy it to kernel space */ /* allocate buffer & copy it to kernel space */
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
if (input.reclen > kml_fsdata->kml_maxsize) if (input.reclen > kml_fsdata->kml_maxsize)
...@@ -45,11 +44,10 @@ int begin_kml_reint (struct file *file, unsigned long arg) ...@@ -45,11 +44,10 @@ int begin_kml_reint (struct file *file, unsigned long arg)
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(path, input.volname, input.namelen); if (copy_from_user(path, input.volname, input.namelen)) {
if ( error ) {
PRESTO_FREE(path, input.namelen + 1); PRESTO_FREE(path, input.namelen + 1);
EXIT; EXIT;
return error; return -EFAULT;
} }
path[input.namelen] = '\0'; path[input.namelen] = '\0';
fset = kml_getfset (path); fset = kml_getfset (path);
...@@ -57,10 +55,9 @@ int begin_kml_reint (struct file *file, unsigned long arg) ...@@ -57,10 +55,9 @@ int begin_kml_reint (struct file *file, unsigned long arg)
kml_fsdata = FSET_GET_KMLDATA(fset); kml_fsdata = FSET_GET_KMLDATA(fset);
/* read the buf from user memory here */ /* read the buf from user memory here */
error = copy_from_user(kml_fsdata->kml_buf, input.recbuf, input.reclen); if (copy_from_user(kml_fsdata->kml_buf, input.recbuf, input.reclen)) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
kml_fsdata->kml_len = input.reclen; kml_fsdata->kml_len = input.reclen;
...@@ -94,21 +91,19 @@ int do_kml_reint (struct file *file, unsigned long arg) ...@@ -94,21 +91,19 @@ int do_kml_reint (struct file *file, unsigned long arg)
struct presto_file_set *fset; struct presto_file_set *fset;
ENTRY; ENTRY;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
PRESTO_ALLOC(path, char *, input.namelen + 1); PRESTO_ALLOC(path, char *, input.namelen + 1);
if ( !path ) { if ( !path ) {
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(path, input.volname, input.namelen); if (copy_from_user(path, input.volname, input.namelen)) {
if ( error ) {
PRESTO_FREE(path, input.namelen + 1); PRESTO_FREE(path, input.namelen + 1);
EXIT; EXIT;
return error; return -EFAULT;
} }
path[input.namelen] = '\0'; path[input.namelen] = '\0';
fset = kml_getfset (path); fset = kml_getfset (path);
...@@ -138,7 +133,8 @@ int do_kml_reint (struct file *file, unsigned long arg) ...@@ -138,7 +133,8 @@ int do_kml_reint (struct file *file, unsigned long arg)
strlen (close->path) + 1, input.pathlen); strlen (close->path) + 1, input.pathlen);
error = -ENOMEM; error = -ENOMEM;
} }
copy_to_user((char *)arg, &input, sizeof (input)); if (copy_to_user((char *)arg, &input, sizeof (input)))
return -EFAULT;
} }
return error; return error;
} }
...@@ -161,10 +157,9 @@ int end_kml_reint (struct file *file, unsigned long arg) ...@@ -161,10 +157,9 @@ int end_kml_reint (struct file *file, unsigned long arg)
char *path; char *path;
ENTRY; ENTRY;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
PRESTO_ALLOC(path, char *, input.namelen + 1); PRESTO_ALLOC(path, char *, input.namelen + 1);
...@@ -172,11 +167,11 @@ int end_kml_reint (struct file *file, unsigned long arg) ...@@ -172,11 +167,11 @@ int end_kml_reint (struct file *file, unsigned long arg)
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(path, input.volname, input.namelen); if (copy_from_user(path, input.volname, input.namelen)) {
if ( error ) { if ( error ) {
PRESTO_FREE(path, input.namelen + 1); PRESTO_FREE(path, input.namelen + 1);
EXIT; EXIT;
return error; return -EFAULT;
} }
path[input.namelen] = '\0'; path[input.namelen] = '\0';
fset = kml_getfset (path); fset = kml_getfset (path);
...@@ -193,7 +188,8 @@ int end_kml_reint (struct file *file, unsigned long arg) ...@@ -193,7 +188,8 @@ int end_kml_reint (struct file *file, unsigned long arg)
#if 0 #if 0
input.newpos = kml_upc->newpos; input.newpos = kml_upc->newpos;
input.count = kml_upc->count; input.count = kml_upc->count;
copy_to_user((char *)arg, &input, sizeof (input)); if (copy_to_user((char *)arg, &input, sizeof (input)))
return -EFAULT;
#endif #endif
return error; return error;
} }
...@@ -149,9 +149,8 @@ static ssize_t presto_psdev_write(struct file *file, const char *buf, ...@@ -149,9 +149,8 @@ static ssize_t presto_psdev_write(struct file *file, const char *buf,
return -EINVAL; return -EINVAL;
} }
error = copy_from_user(&hdr, buf, sizeof(hdr)); if (copy_from_user(&hdr, buf, sizeof(hdr)))
if ( error ) return -EFAULT;
return error;
CDEBUG(D_PSDEV, "(process,opc,uniq)=(%d,%d,%d)\n", CDEBUG(D_PSDEV, "(process,opc,uniq)=(%d,%d,%d)\n",
current->pid, hdr.opcode, hdr.unique); current->pid, hdr.opcode, hdr.unique);
...@@ -183,9 +182,8 @@ static ssize_t presto_psdev_write(struct file *file, const char *buf, ...@@ -183,9 +182,8 @@ static ssize_t presto_psdev_write(struct file *file, const char *buf,
req->rq_bufsize, count, hdr.opcode, hdr.unique); req->rq_bufsize, count, hdr.opcode, hdr.unique);
count = req->rq_bufsize; /* don't have more space! */ count = req->rq_bufsize; /* don't have more space! */
} }
error = copy_from_user(req->rq_data, buf, count); if (copy_from_user(req->rq_data, buf, count))
if ( error ) return -EFAULT;
return error;
/* adjust outsize: good upcalls can be aware of this */ /* adjust outsize: good upcalls can be aware of this */
req->rq_rep_size = count; req->rq_rep_size = count;
...@@ -280,14 +278,12 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -280,14 +278,12 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
char * tmp; char * tmp;
int error; int error;
error = copy_from_user(&readmount, (void *)arg, if (copy_from_user(&readmount, (void *)arg, sizeof(readmount)))
sizeof(readmount));
if ( error ) {
printk("psdev: can't copy %Zd bytes from %p to %p\n", printk("psdev: can't copy %Zd bytes from %p to %p\n",
sizeof(readmount), (struct readmount *) arg, sizeof(readmount), (struct readmount *) arg,
&readmount); &readmount);
EXIT; EXIT;
return error; return -EFAULT;
} }
len = readmount.io_len; len = readmount.io_len;
...@@ -307,15 +303,16 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -307,15 +303,16 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
* I mean, let's let the compiler do a little work ... * I mean, let's let the compiler do a little work ...
* gcc suggested the extra () * gcc suggested the extra ()
*/ */
error = copy_to_user(readmount.io_string, tmp, outlen); if (copy_to_user(readmount.io_string, tmp, outlen)) {
if ( error ) {
CDEBUG(D_PSDEV, "Copy_to_user string 0x%p failed\n", CDEBUG(D_PSDEV, "Copy_to_user string 0x%p failed\n",
readmount.io_string); readmount.io_string);
error = -EFAULT;
} }
if ((!error) && (error = copy_to_user(&(user_readmount->io_len), if (!error && copy_to_user(&(user_readmount->io_len),
&outlen, sizeof(int))) ) { &outlen, sizeof(int))) {
CDEBUG(D_PSDEV, "Copy_to_user len @0x%p failed\n", CDEBUG(D_PSDEV, "Copy_to_user len @0x%p failed\n",
&(user_readmount->io_len)); &(user_readmount->io_len));
error = -EFAULT;
} }
PRESTO_FREE(tmp, len); PRESTO_FREE(tmp, len);
...@@ -360,10 +357,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -360,10 +357,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
int path_len; int path_len;
} input; } input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
PRESTO_ALLOC(path, char *, input.path_len + 1); PRESTO_ALLOC(path, char *, input.path_len + 1);
...@@ -371,11 +367,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -371,11 +367,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(path, input.path, input.path_len); if (copy_from_user(path, input.path, input.path_len)) {
if ( error ) {
PRESTO_FREE(path, input.path_len + 1); PRESTO_FREE(path, input.path_len + 1);
EXIT; EXIT;
return error; return -EFAULT;
} }
path[input.path_len] = '\0'; path[input.path_len] = '\0';
CDEBUG(D_PSDEV, "clear_fsetroot: path %s\n", path); CDEBUG(D_PSDEV, "clear_fsetroot: path %s\n", path);
...@@ -401,10 +396,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -401,10 +396,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
int path_len; int path_len;
} input; } input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
PRESTO_ALLOC(path, char *, input.path_len + 1); PRESTO_ALLOC(path, char *, input.path_len + 1);
...@@ -412,11 +406,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -412,11 +406,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(path, input.path, input.path_len); if (copy_from_user(path, input.path, input.path_len)) {
if ( error ) {
PRESTO_FREE(path, input.path_len + 1); PRESTO_FREE(path, input.path_len + 1);
EXIT; EXIT;
return error; return -EFAULT;
} }
path[input.path_len] = '\0'; path[input.path_len] = '\0';
CDEBUG(D_PSDEV, "clear_all_fsetroot: path %s\n", path); CDEBUG(D_PSDEV, "clear_all_fsetroot: path %s\n", path);
...@@ -440,10 +433,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -440,10 +433,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
int path_len; int path_len;
} input; } input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
PRESTO_ALLOC(path, char *, input.path_len + 1); PRESTO_ALLOC(path, char *, input.path_len + 1);
...@@ -451,11 +443,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -451,11 +443,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(path, input.path, input.path_len); if (copy_from_user(path, input.path, input.path_len)) {
if ( error ) {
PRESTO_FREE(path, input.path_len + 1); PRESTO_FREE(path, input.path_len + 1);
EXIT; EXIT;
return error; return -EFAULT;
} }
path[input.path_len] = '\0'; path[input.path_len] = '\0';
CDEBUG(D_PSDEV, "get_kmlsize: len %d path %s\n", CDEBUG(D_PSDEV, "get_kmlsize: len %d path %s\n",
...@@ -474,7 +465,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -474,7 +465,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
CDEBUG(D_PSDEV, "get_kmlsize: size = %Zd\n", size); CDEBUG(D_PSDEV, "get_kmlsize: size = %Zd\n", size);
EXIT; EXIT;
return copy_to_user((char *)arg, &input, sizeof(input)); if (copy_to_user((char *)arg, &input, sizeof(input)))
return -EFAULT;
return 0;
} }
case PRESTO_GET_RECNO: { case PRESTO_GET_RECNO: {
...@@ -488,10 +481,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -488,10 +481,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
int path_len; int path_len;
} input; } input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
PRESTO_ALLOC(path, char *, input.path_len + 1); PRESTO_ALLOC(path, char *, input.path_len + 1);
...@@ -499,11 +491,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -499,11 +491,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(path, input.path, input.path_len); if (copy_from_user(path, input.path, input.path_len)) {
if ( error ) {
PRESTO_FREE(path, input.path_len + 1); PRESTO_FREE(path, input.path_len + 1);
EXIT; EXIT;
return error; return -EFAULT;
} }
path[input.path_len] = '\0'; path[input.path_len] = '\0';
CDEBUG(D_PSDEV, "get_recno: len %d path %s\n", CDEBUG(D_PSDEV, "get_recno: len %d path %s\n",
...@@ -522,7 +513,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -522,7 +513,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
CDEBUG(D_PSDEV, "get_recno: recno = %d\n", (int) recno); CDEBUG(D_PSDEV, "get_recno: recno = %d\n", (int) recno);
EXIT; EXIT;
return copy_to_user((char *)arg, &input, sizeof(input)); if (copy_to_user((char *)arg, &input, sizeof(input)))
return -EFAULT;
return 0;
} }
case PRESTO_SET_FSETROOT: { case PRESTO_SET_FSETROOT: {
...@@ -543,10 +536,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -543,10 +536,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
int flags; int flags;
} input; } input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
PRESTO_ALLOC(path, char *, input.path_len + 1); PRESTO_ALLOC(path, char *, input.path_len + 1);
...@@ -554,9 +546,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -554,9 +546,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(path, input.path, input.path_len); if (copy_from_user(path, input.path, input.path_len)) {
if ( error ) {
EXIT; EXIT;
error -EFAULT;
goto exit_free_path; goto exit_free_path;
} }
path[input.path_len] = '\0'; path[input.path_len] = '\0';
...@@ -567,9 +559,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -567,9 +559,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
EXIT; EXIT;
goto exit_free_path; goto exit_free_path;
} }
error = copy_from_user(fsetname, input.name, input.name_len); if (copy_from_user(fsetname, input.name, input.name_len)) {
if ( error ) {
EXIT; EXIT;
error = -EFAULT;
goto exit_free_fsetname; goto exit_free_fsetname;
} }
fsetname[input.name_len] = '\0'; fsetname[input.name_len] = '\0';
...@@ -621,12 +613,11 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -621,12 +613,11 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
struct psdev_opt *user_opt = (struct psdev_opt *) arg; struct psdev_opt *user_opt = (struct psdev_opt *) arg;
int error; int error;
error = copy_from_user(&kopt, (void *)arg, sizeof(kopt)); if (copy_from_user(&kopt, (void *)arg, sizeof(kopt))) {
if ( error ) {
printk("psdev: can't copyin %Zd bytes from %p to %p\n", printk("psdev: can't copyin %Zd bytes from %p to %p\n",
sizeof(kopt), (struct kopt *) arg, &kopt); sizeof(kopt), (struct kopt *) arg, &kopt);
EXIT; EXIT;
return error; return -EFAULT;
} }
minor = minor(dev); minor = minor(dev);
if (cmd == PRESTO_SETOPT) if (cmd == PRESTO_SETOPT)
...@@ -650,12 +641,11 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -650,12 +641,11 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
return error; return error;
} }
error = copy_to_user(user_opt, &kopt, sizeof(kopt)); if (copy_to_user(user_opt, &kopt, sizeof(kopt))) {
if ( error ) {
CDEBUG(D_PSDEV, "Copy_to_user opt 0x%p failed\n", CDEBUG(D_PSDEV, "Copy_to_user opt 0x%p failed\n",
user_opt); user_opt);
EXIT; EXIT;
return error; return -EFAULT;
} }
CDEBUG(D_PSDEV, "dosetopt minor %d, opt %d, val %d return %d\n", CDEBUG(D_PSDEV, "dosetopt minor %d, opt %d, val %d return %d\n",
minor, kopt.optname, kopt.optval, error); minor, kopt.optname, kopt.optval, error);
...@@ -668,10 +658,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -668,10 +658,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
struct lento_input_attr input; struct lento_input_attr input;
struct iattr iattr; struct iattr iattr;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
iattr.ia_valid = input.valid; iattr.ia_valid = input.valid;
iattr.ia_mode = (umode_t)input.mode; iattr.ia_mode = (umode_t)input.mode;
...@@ -692,10 +681,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -692,10 +681,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
int error; int error;
struct lento_input_mode input; struct lento_input_mode input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
error = lento_create(input.name, input.mode, &input.info); error = lento_create(input.name, input.mode, &input.info);
...@@ -707,10 +695,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -707,10 +695,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
int error; int error;
struct lento_input_old_new input; struct lento_input_old_new input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
error = lento_link(input.oldname, input.newname, &input.info); error = lento_link(input.oldname, input.newname, &input.info);
...@@ -722,10 +709,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -722,10 +709,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
int error; int error;
struct lento_input input; struct lento_input input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
error = lento_unlink(input.name, &input.info); error = lento_unlink(input.name, &input.info);
...@@ -737,10 +723,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -737,10 +723,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
int error; int error;
struct lento_input_old_new input; struct lento_input_old_new input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
error = lento_symlink(input.oldname, input.newname,&input.info); error = lento_symlink(input.oldname, input.newname,&input.info);
...@@ -752,10 +737,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -752,10 +737,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
int error; int error;
struct lento_input_mode input; struct lento_input_mode input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
error = lento_mkdir(input.name, input.mode, &input.info); error = lento_mkdir(input.name, input.mode, &input.info);
...@@ -767,10 +751,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -767,10 +751,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
int error; int error;
struct lento_input input; struct lento_input input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
error = lento_rmdir(input.name, &input.info); error = lento_rmdir(input.name, &input.info);
...@@ -782,10 +765,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -782,10 +765,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
int error; int error;
struct lento_input_dev input; struct lento_input_dev input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
error = lento_mknod(input.name, input.mode, error = lento_mknod(input.name, input.mode,
...@@ -798,10 +780,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -798,10 +780,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
int error; int error;
struct lento_input_old_new input; struct lento_input_old_new input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
error = lento_rename(input.oldname, input.newname, &input.info); error = lento_rename(input.oldname, input.newname, &input.info);
...@@ -817,30 +798,27 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -817,30 +798,27 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
char *name; char *name;
char *buffer; char *buffer;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
/* Now setup the input parameters */ /* Now setup the input parameters */
PRESTO_ALLOC(name, char *, input.name_len+1); PRESTO_ALLOC(name, char *, input.name_len+1);
/* We need null terminated strings for attr names */ /* We need null terminated strings for attr names */
name[input.name_len] = '\0'; name[input.name_len] = '\0';
error=copy_from_user(name, input.name, input.name_len); if (copy_from_user(name, input.name, input.name_len)) {
if ( error ) {
EXIT; EXIT;
PRESTO_FREE(name,input.name_len+1); PRESTO_FREE(name,input.name_len+1);
return error; return -EFAULT;
} }
PRESTO_ALLOC(buffer, char *, input.buffer_len+1); PRESTO_ALLOC(buffer, char *, input.buffer_len+1);
error=copy_from_user(buffer, input.buffer, input.buffer_len); if (copy_from_user(buffer, input.buffer, input.buffer_len)) {
if ( error ) {
EXIT; EXIT;
PRESTO_FREE(name,input.name_len+1); PRESTO_FREE(name,input.name_len+1);
PRESTO_FREE(buffer,input.buffer_len+1); PRESTO_FREE(buffer,input.buffer_len+1);
return error; return -EFAULT;
} }
/* Make null terminated for easy printing */ /* Make null terminated for easy printing */
buffer[input.buffer_len]='\0'; buffer[input.buffer_len]='\0';
...@@ -869,21 +847,19 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -869,21 +847,19 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
struct lento_input_ext_attr input; struct lento_input_ext_attr input;
char *name; char *name;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
/* Now setup the input parameters */ /* Now setup the input parameters */
PRESTO_ALLOC(name, char *, input.name_len+1); PRESTO_ALLOC(name, char *, input.name_len+1);
/* We need null terminated strings for attr names */ /* We need null terminated strings for attr names */
name[input.name_len] = '\0'; name[input.name_len] = '\0';
error=copy_from_user(name, input.name, input.name_len); if (copy_from_user(name, input.name, input.name_len)) {
if ( error ) {
EXIT; EXIT;
PRESTO_FREE(name,input.name_len+1); PRESTO_FREE(name,input.name_len+1);
return error; return -EFAULT;
} }
CDEBUG(D_PSDEV," delextattr params: name %s," CDEBUG(D_PSDEV," delextattr params: name %s,"
...@@ -907,10 +883,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -907,10 +883,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
struct lento_input_iopen input; struct lento_input_iopen input;
int error; int error;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
input.fd = lento_iopen(input.name, (ino_t)input.ino, input.fd = lento_iopen(input.name, (ino_t)input.ino,
...@@ -921,17 +896,18 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -921,17 +896,18 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
return input.fd; return input.fd;
} }
EXIT; EXIT;
return copy_to_user((char *)arg, &input, sizeof(input)); if (copy_to_user((char *)arg, &input, sizeof(input)))
return -EFAULT;
return 0;
} }
case PRESTO_VFS_CLOSE: { case PRESTO_VFS_CLOSE: {
int error; int error;
struct lento_input_close input; struct lento_input_close input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
CDEBUG(D_PIOCTL, "lento_close file descriptor: %d\n", input.fd); CDEBUG(D_PIOCTL, "lento_close file descriptor: %d\n", input.fd);
...@@ -952,10 +928,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -952,10 +928,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
struct presto_version remote_file_version; struct presto_version remote_file_version;
} input; } input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
user_path = input.path; user_path = input.path;
...@@ -964,11 +939,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -964,11 +939,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(input.path, user_path, input.path_len); if (copy_from_user(input.path, user_path, input.path_len)) {
if ( error ) {
EXIT; EXIT;
PRESTO_FREE(input.path, input.path_len + 1); PRESTO_FREE(input.path, input.path_len + 1);
return error; return -EFAULT;
} }
input.path[input.path_len] = '\0'; input.path[input.path_len] = '\0';
...@@ -996,10 +970,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -996,10 +970,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
struct lento_vfs_context info; struct lento_vfs_context info;
} input; } input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
user_path = input.path; user_path = input.path;
...@@ -1008,11 +981,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -1008,11 +981,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(input.path, user_path, input.path_len); if (copy_from_user(input.path, user_path, input.path_len)) {
if ( error ) {
EXIT; EXIT;
PRESTO_FREE(input.path, input.path_len + 1); PRESTO_FREE(input.path, input.path_len + 1);
return error; return -EFAULT;
} }
input.path[input.path_len] = '\0'; input.path[input.path_len] = '\0';
...@@ -1035,10 +1007,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -1035,10 +1007,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
__u32 path_len; __u32 path_len;
} input; } input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
user_path = input.path; user_path = input.path;
...@@ -1047,11 +1018,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -1047,11 +1018,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(input.path, user_path, input.path_len); if (copy_from_user(input.path, user_path, input.path_len)) {
if ( error ) {
EXIT; EXIT;
PRESTO_FREE(input.path, input.path_len + 1); PRESTO_FREE(input.path, input.path_len + 1);
return error; return -EFAULT;
} }
input.path[input.path_len] = '\0'; input.path[input.path_len] = '\0';
...@@ -1072,10 +1042,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -1072,10 +1042,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
__u32 recno; __u32 recno;
} input; } input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
user_path = input.path; user_path = input.path;
...@@ -1084,11 +1053,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -1084,11 +1053,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(input.path, user_path, input.path_len); if (copy_from_user(input.path, user_path, input.path_len)) {
if ( error ) {
EXIT; EXIT;
PRESTO_FREE(input.path, input.path_len + 1); PRESTO_FREE(input.path, input.path_len + 1);
return error; return -EFAULT;
} }
input.path[input.path_len] = '\0'; input.path[input.path_len] = '\0';
...@@ -1111,10 +1079,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -1111,10 +1079,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
char *path; char *path;
} input; } input;
error = copy_from_user(&input, (char *)arg, sizeof(input)); if (copy_from_user(&input, (char *)arg, sizeof(input))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
user_path = input.path; user_path = input.path;
...@@ -1123,11 +1090,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -1123,11 +1090,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(input.path, user_path, input.path_len); if (copy_from_user(input.path, user_path, input.path_len)) {
if ( error ) {
EXIT; EXIT;
PRESTO_FREE(input.path, input.path_len + 1); PRESTO_FREE(input.path, input.path_len + 1);
return error; return -EFAULT;
} }
input.path[input.path_len] = '\0'; input.path[input.path_len] = '\0';
...@@ -1190,7 +1156,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -1190,7 +1156,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
} }
/* return the correct cookie to wait for */ /* return the correct cookie to wait for */
input.mark_what = res; input.mark_what = res;
return copy_to_user((char *)arg, &input, sizeof(input)); if (copy_to_user((char *)arg, &input, sizeof(input)))
return -EFAULT;
return 0;
} }
#ifdef CONFIG_KREINT #ifdef CONFIG_KREINT
...@@ -1211,11 +1179,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -1211,11 +1179,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
char *path; char *path;
} permit; } permit;
error = copy_from_user(&permit, (char *)arg, sizeof(permit)); if (copy_from_user(&permit, (char *)arg, sizeof(permit))) {
if ( error ) {
EXIT; EXIT;
return error; return -EFAULT;
} }
user_path = permit.path; user_path = permit.path;
PRESTO_ALLOC(permit.path, char *, permit.path_len + 1); PRESTO_ALLOC(permit.path, char *, permit.path_len + 1);
...@@ -1223,11 +1190,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -1223,11 +1190,10 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
EXIT; EXIT;
return -ENOMEM; return -ENOMEM;
} }
error = copy_from_user(permit.path, user_path, permit.path_len); if (copy_from_user(permit.path, user_path, permit.path_len)) {
if ( error ) {
EXIT; EXIT;
PRESTO_FREE(permit.path, permit.path_len + 1); PRESTO_FREE(permit.path, permit.path_len + 1);
return error; return -EFAULT;
} }
permit.path[permit.path_len] = '\0'; permit.path[permit.path_len] = '\0';
...@@ -1241,7 +1207,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file, ...@@ -1241,7 +1207,9 @@ static int presto_psdev_ioctl(struct inode *inode, struct file *file,
return error; return error;
} }
/* return the correct cookie to wait for */ /* return the correct cookie to wait for */
return copy_to_user((char *)arg, &permit, sizeof(permit)); if (copy_to_user((char *)arg, &permit, sizeof(permit)))
return -EFAULT;
return 0;
} }
default: default:
......
...@@ -315,17 +315,19 @@ exp_export(struct nfsctl_export *nxp) ...@@ -315,17 +315,19 @@ exp_export(struct nfsctl_export *nxp)
* 2: We must be able to find an inode from a filehandle. * 2: We must be able to find an inode from a filehandle.
* This means that s_export_op must be set. * This means that s_export_op must be set.
*/ */
if (((inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV)) {
|| (nxp->ex_flags & NFSEXP_FSID)) if (!(nxp->ex_flags & NFSEXP_FSID)) {
&& dprintk("exp_export: export of non-dev fs without fsid");
inode->i_sb->s_export_op) goto finish;
/* Ok, we can export it */; }
else { }
if (!inode->i_sb->s_export_op) {
dprintk("exp_export: export of invalid fs type.\n"); dprintk("exp_export: export of invalid fs type.\n");
goto finish; goto finish;
} }
if (inode->i_sb->s_export_op &&
!inode->i_sb->s_export_op->find_exported_dentry) /* Ok, we can export it */;
if (!inode->i_sb->s_export_op->find_exported_dentry)
inode->i_sb->s_export_op->find_exported_dentry = inode->i_sb->s_export_op->find_exported_dentry =
find_exported_dentry; find_exported_dentry;
......
...@@ -414,11 +414,13 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, ...@@ -414,11 +414,13 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
{ {
struct dentry *dentry; struct dentry *dentry;
struct inode *inode; struct inode *inode;
int err; int flags = O_RDONLY|O_LARGEFILE, mode = FMODE_READ, err;
/* If we get here, then the client has already done an "open", and (hopefully) /*
* checked permission - so allow OWNER_OVERRIDE in case a chmod has now revoked * If we get here, then the client has already done an "open",
* permission */ * and (hopefully) checked permission - so allow OWNER_OVERRIDE
* in case a chmod has now revoked permission.
*/
err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE); err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE);
if (err) if (err)
goto out; goto out;
...@@ -443,37 +445,24 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, ...@@ -443,37 +445,24 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (err) if (err)
goto out_nfserr; goto out_nfserr;
if ((access & MAY_WRITE) && (err = get_write_access(inode)) != 0)
goto out_nfserr;
memset(filp, 0, sizeof(*filp));
filp->f_op = fops_get(inode->i_fop);
atomic_set(&filp->f_count, 1);
filp->f_dentry = dentry;
filp->f_vfsmnt = fhp->fh_export->ex_mnt;
if (access & MAY_WRITE) { if (access & MAY_WRITE) {
filp->f_flags = O_WRONLY|O_LARGEFILE; err = get_write_access(inode);
filp->f_mode = FMODE_WRITE; if (err)
goto out_nfserr;
flags = O_WRONLY|O_LARGEFILE;
mode = FMODE_WRITE;
DQUOT_INIT(inode); DQUOT_INIT(inode);
} else {
filp->f_flags = O_RDONLY|O_LARGEFILE;
filp->f_mode = FMODE_READ;
} }
err = 0; err = init_private_file(filp, dentry, mode);
if (filp->f_op && filp->f_op->open) { if (!err) {
err = filp->f_op->open(inode, filp); filp->f_flags = flags;
if (err) { filp->f_vfsmnt = fhp->fh_export->ex_mnt;
fops_put(filp->f_op); } else if (access & MAY_WRITE)
if (access & MAY_WRITE) put_write_access(inode);
put_write_access(inode);
/* I nearly added put_filp() call here, but this filp
* is really on callers stack frame. -DaveM
*/
atomic_dec(&filp->f_count);
}
}
out_nfserr: out_nfserr:
if (err) if (err)
err = nfserrno(err); err = nfserrno(err);
...@@ -490,9 +479,8 @@ nfsd_close(struct file *filp) ...@@ -490,9 +479,8 @@ nfsd_close(struct file *filp)
struct dentry *dentry = filp->f_dentry; struct dentry *dentry = filp->f_dentry;
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
if (filp->f_op && filp->f_op->release) if (filp->f_op->release)
filp->f_op->release(inode, filp); filp->f_op->release(inode, filp);
fops_put(filp->f_op);
if (filp->f_mode & FMODE_WRITE) if (filp->f_mode & FMODE_WRITE)
put_write_access(inode); put_write_access(inode);
} }
......
...@@ -162,57 +162,107 @@ asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, ...@@ -162,57 +162,107 @@ asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high,
} }
#endif #endif
asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count) ssize_t vfs_read(struct file *file, char *buf, size_t count, loff_t *pos)
{ {
struct inode *inode = file->f_dentry->d_inode;
ssize_t ret; ssize_t ret;
struct file * file;
ret = -EBADF; if (!(file->f_mode & FMODE_READ))
file = fget(fd); return -EBADF;
if (file) { if (!file->f_op || !file->f_op->read)
if (file->f_mode & FMODE_READ) { return -EINVAL;
ret = locks_verify_area(FLOCK_VERIFY_READ, file->f_dentry->d_inode, if (pos < 0)
file, file->f_pos, count); return -EINVAL;
if (!ret) {
ssize_t (*read)(struct file *, char *, size_t, loff_t *); ret = locks_verify_area(FLOCK_VERIFY_READ, inode, file, *pos, count);
ret = -EINVAL; if (!ret) {
if (file->f_op && (read = file->f_op->read) != NULL) ret = file->f_op->read(file, buf, count, pos);
ret = read(file, buf, count, &file->f_pos);
}
}
if (ret > 0) if (ret > 0)
dnotify_parent(file->f_dentry, DN_ACCESS); dnotify_parent(file->f_dentry, DN_ACCESS);
}
return ret;
}
ssize_t vfs_write(struct file *file, const char *buf, size_t count, loff_t *pos)
{
struct inode *inode = file->f_dentry->d_inode;
ssize_t ret;
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (!file->f_op || !file->f_op->write)
return -EINVAL;
if (pos < 0)
return -EINVAL;
ret = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file, *pos, count);
if (!ret) {
ret = file->f_op->write(file, buf, count, pos);
if (ret > 0)
dnotify_parent(file->f_dentry, DN_MODIFY);
}
return ret;
}
asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count)
{
struct file *file;
ssize_t ret = -EBADF;
file = fget(fd);
if (file) {
ret = vfs_read(file, buf, count, &file->f_pos);
fput(file); fput(file);
} }
return ret; return ret;
} }
asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count) asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count)
{ {
ssize_t ret; struct file *file;
struct file * file; ssize_t ret = -EBADF;
ret = -EBADF;
file = fget(fd); file = fget(fd);
if (file) { if (file) {
if (file->f_mode & FMODE_WRITE) { ret = vfs_write(file, buf, count, &file->f_pos);
struct inode *inode = file->f_dentry->d_inode;
ret = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file,
file->f_pos, count);
if (!ret) {
ssize_t (*write)(struct file *, const char *, size_t, loff_t *);
ret = -EINVAL;
if (file->f_op && (write = file->f_op->write) != NULL)
ret = write(file, buf, count, &file->f_pos);
}
}
if (ret > 0)
dnotify_parent(file->f_dentry, DN_MODIFY);
fput(file); fput(file);
} }
return ret; return ret;
} }
asmlinkage ssize_t sys_pread(unsigned int fd, char *buf,
size_t count, loff_t pos)
{
struct file *file;
ssize_t ret = -EBADF;
file = fget(fd);
if (file) {
ret = vfs_read(file, buf, count, &pos);
fput(file);
}
return ret;
}
asmlinkage ssize_t sys_pwrite(unsigned int fd, const char *buf,
size_t count, loff_t pos)
{
struct file *file;
ssize_t ret = -EBADF;
file = fget(fd);
if (file) {
ret = vfs_write(file, buf, count, &pos);
fput(file);
}
return ret;
}
static ssize_t do_readv_writev(int type, struct file *file, static ssize_t do_readv_writev(int type, struct file *file,
const struct iovec * vector, const struct iovec * vector,
...@@ -355,70 +405,3 @@ asmlinkage ssize_t sys_writev(unsigned long fd, const struct iovec * vector, ...@@ -355,70 +405,3 @@ asmlinkage ssize_t sys_writev(unsigned long fd, const struct iovec * vector,
bad_file: bad_file:
return ret; return ret;
} }
/* From the Single Unix Spec: pread & pwrite act like lseek to pos + op +
lseek back to original location. They fail just like lseek does on
non-seekable files. */
asmlinkage ssize_t sys_pread(unsigned int fd, char * buf,
size_t count, loff_t pos)
{
ssize_t ret;
struct file * file;
ssize_t (*read)(struct file *, char *, size_t, loff_t *);
ret = -EBADF;
file = fget(fd);
if (!file)
goto bad_file;
if (!(file->f_mode & FMODE_READ))
goto out;
ret = locks_verify_area(FLOCK_VERIFY_READ, file->f_dentry->d_inode,
file, pos, count);
if (ret)
goto out;
ret = -EINVAL;
if (!file->f_op || !(read = file->f_op->read))
goto out;
if (pos < 0)
goto out;
ret = read(file, buf, count, &pos);
if (ret > 0)
dnotify_parent(file->f_dentry, DN_ACCESS);
out:
fput(file);
bad_file:
return ret;
}
asmlinkage ssize_t sys_pwrite(unsigned int fd, const char * buf,
size_t count, loff_t pos)
{
ssize_t ret;
struct file * file;
ssize_t (*write)(struct file *, const char *, size_t, loff_t *);
ret = -EBADF;
file = fget(fd);
if (!file)
goto bad_file;
if (!(file->f_mode & FMODE_WRITE))
goto out;
ret = locks_verify_area(FLOCK_VERIFY_WRITE, file->f_dentry->d_inode,
file, pos, count);
if (ret)
goto out;
ret = -EINVAL;
if (!file->f_op || !(write = file->f_op->write))
goto out;
if (pos < 0)
goto out;
ret = write(file, buf, count, &pos);
if (ret > 0)
dnotify_parent(file->f_dentry, DN_MODIFY);
out:
fput(file);
bad_file:
return ret;
}
...@@ -1484,13 +1484,19 @@ static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th, ...@@ -1484,13 +1484,19 @@ static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th,
/* inserts the stat data into the tree, and then calls /* inserts the stat data into the tree, and then calls
reiserfs_new_directory (to insert ".", ".." item if new object is reiserfs_new_directory (to insert ".", ".." item if new object is
directory) or reiserfs_new_symlink (to insert symlink body if new directory) or reiserfs_new_symlink (to insert symlink body if new
object is symlink) or nothing (if new object is regular file) */ object is symlink) or nothing (if new object is regular file)
struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th,
struct inode * dir, int mode, NOTE! uid and gid must already be set in the inode. If we return
const char * symname, non-zero due to an error, we have to drop the quota previously allocated
int i_size, /* 0 for regular, EMTRY_DIR_SIZE for dirs, for the fresh inode. This can only be done outside a transaction, so
strlen (symname) for symlinks)*/ if we return non-zero, we also end the transaction. */
struct dentry *dentry, struct inode *inode, int * err) int reiserfs_new_inode (struct reiserfs_transaction_handle *th,
struct inode * dir, int mode,
const char * symname,
/* 0 for regular, EMTRY_DIR_SIZE for dirs,
strlen (symname) for symlinks)*/
loff_t i_size, struct dentry *dentry,
struct inode *inode)
{ {
struct super_block * sb; struct super_block * sb;
INITIALIZE_PATH (path_to_key); INITIALIZE_PATH (path_to_key);
...@@ -1498,72 +1504,40 @@ struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th, ...@@ -1498,72 +1504,40 @@ struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th,
struct item_head ih; struct item_head ih;
struct stat_data sd; struct stat_data sd;
int retval; int retval;
int err;
if (!dir || !dir->i_nlink) { if (!dir || !dir->i_nlink) {
*err = -EPERM; err = -EPERM;
iput(inode) ; goto out_bad_inode;
return NULL;
} }
sb = dir->i_sb; sb = dir->i_sb;
inode->i_flags = 0;//inode->i_sb->s_flags;
/* item head of new item */ /* item head of new item */
ih.ih_key.k_dir_id = INODE_PKEY (dir)->k_objectid; ih.ih_key.k_dir_id = INODE_PKEY (dir)->k_objectid;
ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th)); ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th));
if (!ih.ih_key.k_objectid) { if (!ih.ih_key.k_objectid) {
iput(inode) ; err = -ENOMEM;
*err = -ENOMEM; goto out_bad_inode ;
return NULL;
} }
if (old_format_only (sb)) if (old_format_only (sb))
/* not a perfect generation count, as object ids can be reused, but this /* not a perfect generation count, as object ids can be reused, but
** is as good as reiserfs can do right now. ** this is as good as reiserfs can do right now.
** note that the private part of inode isn't filled in yet, we have ** note that the private part of inode isn't filled in yet, we have
** to use the directory. ** to use the directory.
*/ */
inode->i_generation = le32_to_cpu (INODE_PKEY (dir)->k_objectid); inode->i_generation = le32_to_cpu (INODE_PKEY (dir)->k_objectid);
else else
#if defined( USE_INODE_GENERATION_COUNTER ) #if defined( USE_INODE_GENERATION_COUNTER )
inode->i_generation = inode->i_generation = le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation);
le32_to_cpu( REISERFS_SB(sb) -> s_rs -> s_inode_generation );
#else #else
inode->i_generation = ++event; inode->i_generation = ++event;
#endif #endif
if (old_format_only (sb))
make_le_item_head (&ih, 0, KEY_FORMAT_3_5, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
else
make_le_item_head (&ih, 0, KEY_FORMAT_3_6, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
/* key to search for correct place for new stat data */
_make_cpu_key (&key, KEY_FORMAT_3_6, le32_to_cpu (ih.ih_key.k_dir_id),
le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/);
/* find proper place for inserting of stat data */
retval = search_item (sb, &key, &path_to_key);
if (retval == IO_ERROR) {
iput (inode);
*err = -EIO;
return NULL;
}
if (retval == ITEM_FOUND) {
pathrelse (&path_to_key);
iput (inode);
*err = -EEXIST;
return NULL;
}
/* fill stat data */ /* fill stat data */
inode->i_mode = mode;
inode->i_nlink = (S_ISDIR (mode) ? 2 : 1); inode->i_nlink = (S_ISDIR (mode) ? 2 : 1);
inode->i_uid = current->fsuid;
if (dir->i_mode & S_ISGID) { /* uid and gid must already be set by the caller for quota init */
inode->i_gid = dir->i_gid;
if (S_ISDIR(mode))
inode->i_mode |= S_ISGID;
} else
inode->i_gid = current->fsgid;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_size = i_size; inode->i_size = i_size;
...@@ -1578,18 +1552,38 @@ struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th, ...@@ -1578,18 +1552,38 @@ struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th,
REISERFS_I(inode)->i_trans_id = 0; REISERFS_I(inode)->i_trans_id = 0;
REISERFS_I(inode)->i_trans_index = 0; REISERFS_I(inode)->i_trans_index = 0;
if (old_format_only (sb))
make_le_item_head (&ih, 0, KEY_FORMAT_3_5, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
else
make_le_item_head (&ih, 0, KEY_FORMAT_3_6, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
/* key to search for correct place for new stat data */
_make_cpu_key (&key, KEY_FORMAT_3_6, le32_to_cpu (ih.ih_key.k_dir_id),
le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/);
/* find proper place for inserting of stat data */
retval = search_item (sb, &key, &path_to_key);
if (retval == IO_ERROR) {
err = -EIO;
goto out_bad_inode;
}
if (retval == ITEM_FOUND) {
pathrelse (&path_to_key);
err = -EEXIST;
goto out_bad_inode;
}
if (old_format_only (sb)) { if (old_format_only (sb)) {
if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
pathrelse (&path_to_key); pathrelse (&path_to_key);
/* i_uid or i_gid is too big to be stored in stat data v3.5 */ /* i_uid or i_gid is too big to be stored in stat data v3.5 */
iput (inode); err = -EINVAL;
*err = -EINVAL; goto out_bad_inode;
return NULL;
} }
inode2sd_v1 (&sd, inode); inode2sd_v1 (&sd, inode);
} else } else {
inode2sd (&sd, inode); inode2sd (&sd, inode);
}
// these do not go to on-disk stat data // these do not go to on-disk stat data
inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid); inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid);
inode->i_blksize = PAGE_SIZE; inode->i_blksize = PAGE_SIZE;
...@@ -1610,10 +1604,9 @@ struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th, ...@@ -1610,10 +1604,9 @@ struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th,
/* insert the stat data into the tree */ /* insert the stat data into the tree */
retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd)); retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd));
if (retval) { if (retval) {
iput (inode); err = retval;
*err = retval;
reiserfs_check_path(&path_to_key) ; reiserfs_check_path(&path_to_key) ;
return NULL; goto out_bad_inode;
} }
if (S_ISDIR(mode)) { if (S_ISDIR(mode)) {
...@@ -1628,19 +1621,35 @@ struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th, ...@@ -1628,19 +1621,35 @@ struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th,
retval = reiserfs_new_symlink (th, &ih, &path_to_key, symname, i_size); retval = reiserfs_new_symlink (th, &ih, &path_to_key, symname, i_size);
} }
if (retval) { if (retval) {
inode->i_nlink = 0; err = retval;
iput (inode);
*err = retval;
reiserfs_check_path(&path_to_key) ; reiserfs_check_path(&path_to_key) ;
return NULL; journal_end(th, th->t_super, th->t_blocks_allocated);
goto out_inserted_sd;
} }
insert_inode_hash (inode); insert_inode_hash (inode);
// we do not mark inode dirty: on disk content matches to the reiserfs_update_sd(th, inode);
// in-core one
reiserfs_check_path(&path_to_key) ; reiserfs_check_path(&path_to_key) ;
return inode; return 0;
/* it looks like you can easily compress these two goto targets into
* one. Keeping it like this doesn't actually hurt anything, and they
* are place holders for what the quota code actually needs.
*/
out_bad_inode:
/* Invalidate the object, nothing was inserted yet */
INODE_PKEY(inode)->k_objectid = 0;
/* dquot_drop must be done outside a transaction */
journal_end(th, th->t_super, th->t_blocks_allocated) ;
make_bad_inode(inode);
out_inserted_sd:
inode->i_nlink = 0;
th->t_trans_id = 0; /* so the caller can't use this handle later */
iput(inode);
return err;
} }
/* /*
......
...@@ -204,7 +204,7 @@ static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block, ...@@ -204,7 +204,7 @@ static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block,
if (!jb->bitmaps[bmap_nr]) { if (!jb->bitmaps[bmap_nr]) {
jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb) ; jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb) ;
} }
set_bit(bit_nr, jb->bitmaps[bmap_nr]->data) ; set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data) ;
return 0 ; return 0 ;
} }
...@@ -550,7 +550,7 @@ int reiserfs_in_journal(struct super_block *p_s_sb, ...@@ -550,7 +550,7 @@ int reiserfs_in_journal(struct super_block *p_s_sb,
PROC_INFO_INC( p_s_sb, journal.in_journal_bitmap ); PROC_INFO_INC( p_s_sb, journal.in_journal_bitmap );
jb = SB_JOURNAL(p_s_sb)->j_list_bitmap + i ; jb = SB_JOURNAL(p_s_sb)->j_list_bitmap + i ;
if (jb->journal_list && jb->bitmaps[bmap_nr] && if (jb->journal_list && jb->bitmaps[bmap_nr] &&
test_bit(bit_nr, jb->bitmaps[bmap_nr]->data)) { test_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data)) {
tmp_bit = find_next_zero_bit((unsigned long *) tmp_bit = find_next_zero_bit((unsigned long *)
(jb->bitmaps[bmap_nr]->data), (jb->bitmaps[bmap_nr]->data),
p_s_sb->s_blocksize << 3, bit_nr+1) ; p_s_sb->s_blocksize << 3, bit_nr+1) ;
......
...@@ -248,7 +248,7 @@ static int linear_search_in_dir_item (struct cpu_key * key, struct reiserfs_dir_ ...@@ -248,7 +248,7 @@ static int linear_search_in_dir_item (struct cpu_key * key, struct reiserfs_dir_
/* mark, that this generation number is used */ /* mark, that this generation number is used */
if (de->de_gen_number_bit_string) if (de->de_gen_number_bit_string)
set_bit (GET_GENERATION_NUMBER (deh_offset (deh)), de->de_gen_number_bit_string); set_bit (GET_GENERATION_NUMBER (deh_offset (deh)), (unsigned long *)de->de_gen_number_bit_string);
// calculate pointer to name and namelen // calculate pointer to name and namelen
de->de_entry_num = i; de->de_entry_num = i;
...@@ -504,7 +504,7 @@ static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct in ...@@ -504,7 +504,7 @@ static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct in
return -EEXIST; return -EEXIST;
} }
gen_number = find_first_zero_bit (bit_string, MAX_GENERATION_NUMBER + 1); gen_number = find_first_zero_bit ((unsigned long *)bit_string, MAX_GENERATION_NUMBER + 1);
if (gen_number > MAX_GENERATION_NUMBER) { if (gen_number > MAX_GENERATION_NUMBER) {
/* there is no free generation number */ /* there is no free generation number */
reiserfs_warning ("reiserfs_add_entry: Congratulations! we have got hash function screwed up\n"); reiserfs_warning ("reiserfs_add_entry: Congratulations! we have got hash function screwed up\n");
...@@ -552,6 +552,40 @@ static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct in ...@@ -552,6 +552,40 @@ static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct in
return 0; return 0;
} }
/* quota utility function, call if you've had to abort after calling
** new_inode_init, and have not called reiserfs_new_inode yet.
** This should only be called on inodes that do not hav stat data
** inserted into the tree yet.
*/
static int drop_new_inode(struct inode *inode) {
make_bad_inode(inode) ;
iput(inode) ;
return 0 ;
}
/* utility function that does setup for reiserfs_new_inode.
** DQUOT_ALLOC_INODE cannot be called inside a transaction, so we had
** to pull some bits of reiserfs_new_inode out into this func.
** Yes, the actual quota calls are missing, they are part of the quota
** patch.
*/
static int new_inode_init(struct inode *inode, struct inode *dir, int mode) {
/* the quota init calls have to know who to charge the quota to, so
** we have to set uid and gid here
*/
inode->i_uid = current->fsuid;
inode->i_mode = mode;
if (dir->i_mode & S_ISGID) {
inode->i_gid = dir->i_gid;
if (S_ISDIR(mode))
inode->i_mode |= S_ISGID;
} else {
inode->i_gid = current->fsgid;
}
return 0 ;
}
// //
// a portion of this function, particularly the VFS interface portion, // a portion of this function, particularly the VFS interface portion,
...@@ -564,7 +598,6 @@ static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode) ...@@ -564,7 +598,6 @@ static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode)
{ {
int retval; int retval;
struct inode * inode; struct inode * inode;
int windex ;
int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 ; int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 ;
struct reiserfs_transaction_handle th ; struct reiserfs_transaction_handle th ;
...@@ -572,16 +605,16 @@ static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode) ...@@ -572,16 +605,16 @@ static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode)
if (!inode) { if (!inode) {
return -ENOMEM ; return -ENOMEM ;
} }
retval = new_inode_init(inode, dir, mode);
if (retval)
return retval;
lock_kernel(); lock_kernel();
journal_begin(&th, dir->i_sb, jbegin_count) ; journal_begin(&th, dir->i_sb, jbegin_count) ;
th.t_caller = "create" ; th.t_caller = "create" ;
windex = push_journal_writer("reiserfs_create") ; retval = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode);
inode = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode, &retval); if (retval) {
if (!inode) { goto out_failed;
pop_journal_writer(windex) ;
journal_end(&th, dir->i_sb, jbegin_count) ;
unlock_kernel();
return retval;
} }
inode->i_op = &reiserfs_file_inode_operations; inode->i_op = &reiserfs_file_inode_operations;
...@@ -593,22 +626,19 @@ static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode) ...@@ -593,22 +626,19 @@ static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode)
if (retval) { if (retval) {
inode->i_nlink--; inode->i_nlink--;
reiserfs_update_sd (&th, inode); reiserfs_update_sd (&th, inode);
pop_journal_writer(windex) ;
// FIXME: should we put iput here and have stat data deleted
// in the same transactioin
journal_end(&th, dir->i_sb, jbegin_count) ; journal_end(&th, dir->i_sb, jbegin_count) ;
iput (inode); iput (inode);
unlock_kernel(); goto out_failed;
return retval;
} }
reiserfs_update_inode_transaction(inode) ; reiserfs_update_inode_transaction(inode) ;
reiserfs_update_inode_transaction(dir) ; reiserfs_update_inode_transaction(dir) ;
d_instantiate(dentry, inode); d_instantiate(dentry, inode);
pop_journal_writer(windex) ;
journal_end(&th, dir->i_sb, jbegin_count) ; journal_end(&th, dir->i_sb, jbegin_count) ;
out_failed:
unlock_kernel(); unlock_kernel();
return 0; return retval;
} }
...@@ -623,7 +653,6 @@ static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode, ...@@ -623,7 +653,6 @@ static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode,
{ {
int retval; int retval;
struct inode * inode; struct inode * inode;
int windex ;
struct reiserfs_transaction_handle th ; struct reiserfs_transaction_handle th ;
int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3;
...@@ -631,16 +660,16 @@ static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode, ...@@ -631,16 +660,16 @@ static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode,
if (!inode) { if (!inode) {
return -ENOMEM ; return -ENOMEM ;
} }
retval = new_inode_init(inode, dir, mode);
if (retval)
return retval;
lock_kernel(); lock_kernel();
journal_begin(&th, dir->i_sb, jbegin_count) ; journal_begin(&th, dir->i_sb, jbegin_count) ;
windex = push_journal_writer("reiserfs_mknod") ;
inode = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode, &retval); retval = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode);
if (!inode) { if (retval) {
pop_journal_writer(windex) ; goto out_failed;
journal_end(&th, dir->i_sb, jbegin_count) ;
unlock_kernel();
return retval;
} }
init_special_inode(inode, mode, rdev) ; init_special_inode(inode, mode, rdev) ;
...@@ -656,18 +685,17 @@ static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode, ...@@ -656,18 +685,17 @@ static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode,
if (retval) { if (retval) {
inode->i_nlink--; inode->i_nlink--;
reiserfs_update_sd (&th, inode); reiserfs_update_sd (&th, inode);
pop_journal_writer(windex) ;
journal_end(&th, dir->i_sb, jbegin_count) ; journal_end(&th, dir->i_sb, jbegin_count) ;
iput (inode); iput (inode);
unlock_kernel(); goto out_failed;
return retval;
} }
d_instantiate(dentry, inode); d_instantiate(dentry, inode);
pop_journal_writer(windex) ;
journal_end(&th, dir->i_sb, jbegin_count) ; journal_end(&th, dir->i_sb, jbegin_count) ;
out_failed:
unlock_kernel(); unlock_kernel();
return 0; return retval;
} }
...@@ -682,33 +710,33 @@ static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode) ...@@ -682,33 +710,33 @@ static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode)
{ {
int retval; int retval;
struct inode * inode; struct inode * inode;
int windex ;
struct reiserfs_transaction_handle th ; struct reiserfs_transaction_handle th ;
int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3;
mode = S_IFDIR | mode;
inode = new_inode(dir->i_sb) ; inode = new_inode(dir->i_sb) ;
if (!inode) { if (!inode) {
return -ENOMEM ; return -ENOMEM ;
} }
retval = new_inode_init(inode, dir, mode);
if (retval)
return retval;
lock_kernel(); lock_kernel();
journal_begin(&th, dir->i_sb, jbegin_count) ; journal_begin(&th, dir->i_sb, jbegin_count) ;
windex = push_journal_writer("reiserfs_mkdir") ;
/* inc the link count now, so another writer doesn't overflow it while /* inc the link count now, so another writer doesn't overflow it while
** we sleep later on. ** we sleep later on.
*/ */
INC_DIR_INODE_NLINK(dir) INC_DIR_INODE_NLINK(dir)
mode = S_IFDIR | mode; retval = reiserfs_new_inode (&th, dir, mode, 0/*symlink*/,
inode = reiserfs_new_inode (&th, dir, mode, 0/*symlink*/, old_format_only (dir->i_sb) ?
old_format_only (dir->i_sb) ? EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
dentry, inode, &retval); dentry, inode);
if (!inode) { if (retval) {
pop_journal_writer(windex) ;
dir->i_nlink-- ; dir->i_nlink-- ;
journal_end(&th, dir->i_sb, jbegin_count) ; goto out_failed;
unlock_kernel();
return retval;
} }
reiserfs_update_inode_transaction(inode) ; reiserfs_update_inode_transaction(inode) ;
reiserfs_update_inode_transaction(dir) ; reiserfs_update_inode_transaction(dir) ;
...@@ -723,21 +751,19 @@ static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode) ...@@ -723,21 +751,19 @@ static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode)
inode->i_nlink = 0; inode->i_nlink = 0;
DEC_DIR_INODE_NLINK(dir); DEC_DIR_INODE_NLINK(dir);
reiserfs_update_sd (&th, inode); reiserfs_update_sd (&th, inode);
pop_journal_writer(windex) ;
journal_end(&th, dir->i_sb, jbegin_count) ; journal_end(&th, dir->i_sb, jbegin_count) ;
iput (inode); iput (inode);
unlock_kernel(); goto out_failed;
return retval;
} }
// the above add_entry did not update dir's stat data // the above add_entry did not update dir's stat data
reiserfs_update_sd (&th, dir); reiserfs_update_sd (&th, dir);
d_instantiate(dentry, inode); d_instantiate(dentry, inode);
pop_journal_writer(windex) ;
journal_end(&th, dir->i_sb, jbegin_count) ; journal_end(&th, dir->i_sb, jbegin_count) ;
out_failed:
unlock_kernel(); unlock_kernel();
return 0; return retval;
} }
static inline int reiserfs_empty_dir(struct inode *inode) { static inline int reiserfs_empty_dir(struct inode *inode) {
...@@ -942,43 +968,43 @@ static int reiserfs_symlink (struct inode * dir, struct dentry * dentry, const c ...@@ -942,43 +968,43 @@ static int reiserfs_symlink (struct inode * dir, struct dentry * dentry, const c
struct inode * inode; struct inode * inode;
char * name; char * name;
int item_len; int item_len;
int windex ;
struct reiserfs_transaction_handle th ; struct reiserfs_transaction_handle th ;
int mode = S_IFLNK | S_IRWXUGO;
int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3;
inode = new_inode(dir->i_sb) ; inode = new_inode(dir->i_sb) ;
if (!inode) { if (!inode) {
return -ENOMEM ; return -ENOMEM ;
} }
retval = new_inode_init(inode, dir, mode);
if (retval) {
return retval;
}
lock_kernel();
item_len = ROUND_UP (strlen (symname)); item_len = ROUND_UP (strlen (symname));
if (item_len > MAX_DIRECT_ITEM_LEN (dir->i_sb->s_blocksize)) { if (item_len > MAX_DIRECT_ITEM_LEN (dir->i_sb->s_blocksize)) {
iput(inode) ; retval = -ENAMETOOLONG;
return -ENAMETOOLONG; drop_new_inode(inode);
goto out_failed;
} }
lock_kernel();
name = reiserfs_kmalloc (item_len, GFP_NOFS, dir->i_sb); name = reiserfs_kmalloc (item_len, GFP_NOFS, dir->i_sb);
if (!name) { if (!name) {
iput(inode) ; drop_new_inode(inode);
unlock_kernel(); retval = -ENOMEM;
return -ENOMEM; goto out_failed;
} }
memcpy (name, symname, strlen (symname)); memcpy (name, symname, strlen (symname));
padd_item (name, item_len, strlen (symname)); padd_item (name, item_len, strlen (symname));
journal_begin(&th, dir->i_sb, jbegin_count) ; journal_begin(&th, dir->i_sb, jbegin_count) ;
windex = push_journal_writer("reiserfs_symlink") ;
inode = reiserfs_new_inode (&th, dir, S_IFLNK | S_IRWXUGO, name, strlen (symname), dentry, retval = reiserfs_new_inode (&th, dir, mode, name, strlen (symname),
inode, &retval); dentry, inode);
reiserfs_kfree (name, item_len, dir->i_sb); reiserfs_kfree (name, item_len, dir->i_sb);
if (inode == 0) { /* reiserfs_new_inode iputs for us */ if (retval) { /* reiserfs_new_inode iputs for us */
pop_journal_writer(windex) ; goto out_failed;
journal_end(&th, dir->i_sb, jbegin_count) ;
unlock_kernel();
return retval;
} }
reiserfs_update_inode_transaction(inode) ; reiserfs_update_inode_transaction(inode) ;
...@@ -996,18 +1022,16 @@ static int reiserfs_symlink (struct inode * dir, struct dentry * dentry, const c ...@@ -996,18 +1022,16 @@ static int reiserfs_symlink (struct inode * dir, struct dentry * dentry, const c
if (retval) { if (retval) {
inode->i_nlink--; inode->i_nlink--;
reiserfs_update_sd (&th, inode); reiserfs_update_sd (&th, inode);
pop_journal_writer(windex) ;
journal_end(&th, dir->i_sb, jbegin_count) ; journal_end(&th, dir->i_sb, jbegin_count) ;
iput (inode); iput (inode);
unlock_kernel(); goto out_failed;
return retval;
} }
d_instantiate(dentry, inode); d_instantiate(dentry, inode);
pop_journal_writer(windex) ;
journal_end(&th, dir->i_sb, jbegin_count) ; journal_end(&th, dir->i_sb, jbegin_count) ;
out_failed:
unlock_kernel(); unlock_kernel();
return 0; return retval;
} }
......
...@@ -839,6 +839,8 @@ static struct xor_block_template xor_block_pIII_sse = { ...@@ -839,6 +839,8 @@ static struct xor_block_template xor_block_pIII_sse = {
/* Also try the generic routines. */ /* Also try the generic routines. */
#include <asm-generic/xor.h> #include <asm-generic/xor.h>
#define cpu_has_mmx (test_bit(X86_FEATURE_MMX, boot_cpu_data.x86_capability))
#undef XOR_TRY_TEMPLATES #undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \ #define XOR_TRY_TEMPLATES \
do { \ do { \
...@@ -846,7 +848,7 @@ static struct xor_block_template xor_block_pIII_sse = { ...@@ -846,7 +848,7 @@ static struct xor_block_template xor_block_pIII_sse = {
xor_speed(&xor_block_32regs); \ xor_speed(&xor_block_32regs); \
if (cpu_has_xmm) \ if (cpu_has_xmm) \
xor_speed(&xor_block_pIII_sse); \ xor_speed(&xor_block_pIII_sse); \
if (md_cpu_has_mmx()) { \ if (cpu_has_mmx) { \
xor_speed(&xor_block_pII_mmx); \ xor_speed(&xor_block_pII_mmx); \
xor_speed(&xor_block_p5_mmx); \ xor_speed(&xor_block_p5_mmx); \
} \ } \
......
...@@ -759,6 +759,9 @@ struct inode_operations { ...@@ -759,6 +759,9 @@ struct inode_operations {
struct seq_file; struct seq_file;
extern ssize_t vfs_read(struct file *, char *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char *, size_t, loff_t *);
/* /*
* NOTE: write_inode, delete_inode, clear_inode, put_inode can be called * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
* without the big kernel lock held in all filesystems. * without the big kernel lock held in all filesystems.
......
...@@ -169,8 +169,9 @@ struct mdk_rdev_s ...@@ -169,8 +169,9 @@ struct mdk_rdev_s
struct block_device *bdev; /* block device handle */ struct block_device *bdev; /* block device handle */
mdp_super_t *sb; struct page *sb_page;
unsigned long sb_offset; mdp_super_t *sb;
unsigned long sb_offset;
int alias_device; /* device alias to the same disk */ int alias_device; /* device alias to the same disk */
int faulty; /* if faulty do not issue IO requests */ int faulty; /* if faulty do not issue IO requests */
......
...@@ -7,21 +7,21 @@ ...@@ -7,21 +7,21 @@
/* /*
* *
* Each stripe contains one buffer per disc. Each buffer can be in * Each stripe contains one buffer per disc. Each buffer can be in
* one of a number of states determined by bh_state. Changes between * one of a number of states stored in "flags". Changes between
* these states happen *almost* exclusively under a per-stripe * these states happen *almost* exclusively under a per-stripe
* spinlock. Some very specific changes can happen in b_end_io, and * spinlock. Some very specific changes can happen in bi_end_io, and
* these are not protected by the spin lock. * these are not protected by the spin lock.
* *
* The bh_state bits that are used to represent these states are: * The flag bits that are used to represent these states are:
* BH_Uptodate, BH_Lock * R5_UPTODATE and R5_LOCKED
* *
* State Empty == !Uptodate, !Lock * State Empty == !UPTODATE, !LOCK
* We have no data, and there is no active request * We have no data, and there is no active request
* State Want == !Uptodate, Lock * State Want == !UPTODATE, LOCK
* A read request is being submitted for this block * A read request is being submitted for this block
* State Dirty == Uptodate, Lock * State Dirty == UPTODATE, LOCK
* Some new data is in this buffer, and it is being written out * Some new data is in this buffer, and it is being written out
* State Clean == Uptodate, !Lock * State Clean == UPTODATE, !LOCK
* We have valid data which is the same as on disc * We have valid data which is the same as on disc
* *
* The possible state transitions are: * The possible state transitions are:
...@@ -124,24 +124,29 @@ ...@@ -124,24 +124,29 @@
* plus raid5d if it is handling it, plus one for each active request * plus raid5d if it is handling it, plus one for each active request
* on a cached buffer. * on a cached buffer.
*/ */
struct stripe_head { struct stripe_head {
struct stripe_head *hash_next, **hash_pprev; /* hash pointers */ struct stripe_head *hash_next, **hash_pprev; /* hash pointers */
struct list_head lru; /* inactive_list or handle_list */ struct list_head lru; /* inactive_list or handle_list */
struct raid5_private_data *raid_conf; struct raid5_private_data *raid_conf;
struct buffer_head *bh_cache[MD_SB_DISKS]; /* buffered copy */ sector_t sector; /* sector of this row */
struct buffer_head *bh_read[MD_SB_DISKS]; /* read request buffers of the MD device */
struct buffer_head *bh_write[MD_SB_DISKS]; /* write request buffers of the MD device */
struct buffer_head *bh_written[MD_SB_DISKS]; /* write request buffers of the MD device that have been scheduled for write */
struct page *bh_page[MD_SB_DISKS]; /* saved bh_cache[n]->b_page when reading around the cache */
unsigned long sector; /* sector of this row */
int size; /* buffers size */
int pd_idx; /* parity disk index */ int pd_idx; /* parity disk index */
unsigned long state; /* state flags */ unsigned long state; /* state flags */
atomic_t count; /* nr of active thread/requests */ atomic_t count; /* nr of active thread/requests */
spinlock_t lock; spinlock_t lock;
int sync_redone; struct r5dev {
struct bio req;
struct bio_vec vec;
struct page *page;
struct bio *toread, *towrite, *written;
sector_t sector; /* sector of this page */
unsigned long flags;
} dev[1]; /* allocated with extra space depending of RAID geometry */
}; };
/* Flags */
#define R5_UPTODATE 0 /* page contains current data */
#define R5_LOCKED 1 /* IO has been submitted on "req" */
#define R5_OVERWRITE 2 /* towrite covers whole page */
/* /*
* Write method * Write method
...@@ -187,6 +192,7 @@ struct stripe_head { ...@@ -187,6 +192,7 @@ struct stripe_head {
struct disk_info { struct disk_info {
kdev_t dev; kdev_t dev;
struct block_device *bdev;
int operational; int operational;
int number; int number;
int raid_disk; int raid_disk;
...@@ -201,7 +207,6 @@ struct raid5_private_data { ...@@ -201,7 +207,6 @@ struct raid5_private_data {
mdk_thread_t *thread, *resync_thread; mdk_thread_t *thread, *resync_thread;
struct disk_info disks[MD_SB_DISKS]; struct disk_info disks[MD_SB_DISKS];
struct disk_info *spare; struct disk_info *spare;
int buffer_size;
int chunk_size, level, algorithm; int chunk_size, level, algorithm;
int raid_disks, working_disks, failed_disks; int raid_disks, working_disks, failed_disks;
int resync_parity; int resync_parity;
...@@ -210,16 +215,19 @@ struct raid5_private_data { ...@@ -210,16 +215,19 @@ struct raid5_private_data {
struct list_head handle_list; /* stripes needing handling */ struct list_head handle_list; /* stripes needing handling */
struct list_head delayed_list; /* stripes that have plugged requests */ struct list_head delayed_list; /* stripes that have plugged requests */
atomic_t preread_active_stripes; /* stripes with scheduled io */ atomic_t preread_active_stripes; /* stripes with scheduled io */
char cache_name[20];
kmem_cache_t *slab_cache; /* for allocating stripes */
/* /*
* Free stripes pool * Free stripes pool
*/ */
atomic_t active_stripes; atomic_t active_stripes;
struct list_head inactive_list; struct list_head inactive_list;
md_wait_queue_head_t wait_for_stripe; wait_queue_head_t wait_for_stripe;
int inactive_blocked; /* release of inactive stripes blocked, int inactive_blocked; /* release of inactive stripes blocked,
* waiting for 25% to be free * waiting for 25% to be free
*/ */
md_spinlock_t device_lock; spinlock_t device_lock;
int plugged; int plugged;
struct tq_struct plug_tq; struct tq_struct plug_tq;
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#define MAX_XOR_BLOCKS 5 #define MAX_XOR_BLOCKS 5
extern void xor_block(unsigned int count, struct buffer_head **bh_ptr); extern void xor_block(unsigned int count, unsigned int bytes, void **ptr);
struct xor_block_template { struct xor_block_template {
struct xor_block_template *next; struct xor_block_template *next;
......
...@@ -1841,10 +1841,10 @@ struct inode * reiserfs_iget (struct super_block * s, ...@@ -1841,10 +1841,10 @@ struct inode * reiserfs_iget (struct super_block * s,
const struct cpu_key * key); const struct cpu_key * key);
struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th, int reiserfs_new_inode (struct reiserfs_transaction_handle *th,
struct inode * dir, int mode, struct inode * dir, int mode,
const char * symname, int item_len, const char * symname, loff_t i_size,
struct dentry *dentry, struct inode *inode, int * err); struct dentry *dentry, struct inode *inode);
int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode); int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode);
void reiserfs_update_sd (struct reiserfs_transaction_handle *th, struct inode * inode); void reiserfs_update_sd (struct reiserfs_transaction_handle *th, struct inode * inode);
......
...@@ -231,7 +231,7 @@ void daemonize(void) ...@@ -231,7 +231,7 @@ void daemonize(void)
/* /*
* When we die, we re-parent all our children. * When we die, we re-parent all our children.
* Try to give them to another thread in our process * Try to give them to another thread in our thread
* group, and if no such member exists, give it to * group, and if no such member exists, give it to
* the global child reaper process (ie "init") * the global child reaper process (ie "init")
*/ */
...@@ -241,8 +241,14 @@ static inline void forget_original_parent(struct task_struct * father) ...@@ -241,8 +241,14 @@ static inline void forget_original_parent(struct task_struct * father)
read_lock(&tasklist_lock); read_lock(&tasklist_lock);
/* Next in our thread group */ /* Next in our thread group, if they're not already exiting */
reaper = next_thread(father); reaper = father;
do {
reaper = next_thread(reaper);
if (!(reaper->flags & PF_EXITING))
break;
} while (reaper != father);
if (reaper == father) if (reaper == father)
reaper = child_reaper; reaper = child_reaper;
......
...@@ -243,6 +243,8 @@ EXPORT_SYMBOL(shrink_dcache_anon); ...@@ -243,6 +243,8 @@ EXPORT_SYMBOL(shrink_dcache_anon);
EXPORT_SYMBOL(find_inode_number); EXPORT_SYMBOL(find_inode_number);
EXPORT_SYMBOL(is_subdir); EXPORT_SYMBOL(is_subdir);
EXPORT_SYMBOL(get_unused_fd); EXPORT_SYMBOL(get_unused_fd);
EXPORT_SYMBOL(vfs_read);
EXPORT_SYMBOL(vfs_write);
EXPORT_SYMBOL(vfs_create); EXPORT_SYMBOL(vfs_create);
EXPORT_SYMBOL(vfs_mkdir); EXPORT_SYMBOL(vfs_mkdir);
EXPORT_SYMBOL(vfs_mknod); EXPORT_SYMBOL(vfs_mknod);
......
...@@ -949,12 +949,10 @@ svc_sock_update_bufs(struct svc_serv *serv) ...@@ -949,12 +949,10 @@ svc_sock_update_bufs(struct svc_serv *serv)
if (sock->type == SOCK_DGRAM) { if (sock->type == SOCK_DGRAM) {
/* udp sockets need large rcvbuf as all pending /* udp sockets need large rcvbuf as all pending
* requests are still in that buffer. * requests are still in that buffer.
* As outgoing requests do not wait for an
* ACK, only a moderate sndbuf is needed
*/ */
svc_sock_setbufsize(sock, svc_sock_setbufsize(sock,
5 * serv->sv_bufsz, (serv->sv_nrthreads+3) * serv->sv_bufsz,
(serv->sv_nrthreads+2)* serv->sv_bufsz); (serv->sv_nrthreads+3) * serv->sv_bufsz);
} else if (svsk->sk_sk->state != TCP_LISTEN) { } else if (svsk->sk_sk->state != TCP_LISTEN) {
printk(KERN_ERR "RPC update_bufs: permanent sock neither UDP or TCP_LISTEN\n"); printk(KERN_ERR "RPC update_bufs: permanent sock neither UDP or TCP_LISTEN\n");
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment