Commit 6d4073e8 authored by Boaz Harrosh's avatar Boaz Harrosh

exofs: BUG: Avoid sbi realloc

Since the beginning we realloced the sbi structure when a bigger
then one device table was specified. (I know that was really stupid).

Then much later when "register bdi" was added (By Jens) it was
registering the pointer to sbi->bdi before the realloc.

We never saw this problem because up till now the realloc did not
do anything since the device table was small enough to fit in the
original allocation. But once we starting testing with large device
tables (Bigger then 28) we noticed the crash of writeback operating
on a deallocated pointer.

* Avoid the all mess by allocating the device-table as a second array
  and get rid of the variable-sized structure and the rest of this
  mess.
* Take the chance to clean near by structures and comments.
* Add a needed dprint on startup to indicate the loaded layout.
* Also move the bdi registration to the very end because it will
  only fail in a low memory, which will probably fail before hand.
  There are many more likely causes to not load before that. This
  way the error handling is made simpler. (Just doing this would be
  enough to fix the BUG)
Signed-off-by: default avatarBoaz Harrosh <bharrosh@panasas.com>
parent 26ae93c2
......@@ -66,13 +66,14 @@ struct exofs_layout {
enum exofs_inode_layout_gen_functions lay_func;
unsigned s_numdevs; /* Num of devices in array */
struct osd_dev *s_ods[0]; /* Variable length */
struct osd_dev **s_ods; /* osd_dev array */
};
/*
* our extension to the in-memory superblock
*/
struct exofs_sb_info {
struct backing_dev_info bdi; /* register our bdi with VFS */
struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/
int s_timeout; /* timeout for OSD operations */
uint64_t s_nextid; /* highest object ID used */
......@@ -81,15 +82,11 @@ struct exofs_sb_info {
u32 s_next_generation; /* next gen # to use */
atomic_t s_curr_pending; /* number of pending commands */
uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
struct backing_dev_info bdi; /* register our bdi with VFS */
struct pnfs_osd_data_map data_map; /* Default raid to use
* FIXME: Needed ?
*/
/* struct exofs_layout dir_layout;*/ /* Default dir layout */
struct exofs_layout layout; /* Default files layout,
* contains the variable osd_dev
* array. Keep last */
struct exofs_layout layout; /* Default files layout */
struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */
};
......
......@@ -393,6 +393,8 @@ void exofs_free_sbi(struct exofs_sb_info *sbi)
osduld_put_device(od);
}
}
if (sbi->layout.s_ods != sbi->_min_one_dev)
kfree(sbi->layout.s_ods);
kfree(sbi);
}
......@@ -501,6 +503,15 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
return -EINVAL;
}
EXOFS_DBGMSG("exofs: layout: "
"num_comps=%u stripe_unit=0x%x group_width=%u "
"group_depth=0x%llx mirrors_p1=%u raid_algorithm=%u\n",
numdevs,
sbi->layout.stripe_unit,
sbi->layout.group_width,
_LLU(sbi->layout.group_depth),
sbi->layout.mirrors_p1,
sbi->data_map.odm_raid_algorithm);
return 0;
}
......@@ -547,11 +558,10 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
return !(odi->systemid_len || odi->osdname_len);
}
static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
struct osd_dev *fscb_od,
unsigned table_count)
{
struct exofs_sb_info *sbi = *psbi;
struct osd_dev *fscb_od;
struct osd_obj_id obj = {.partition = sbi->layout.s_pid,
.id = EXOFS_DEVTABLE_ID};
struct exofs_device_table *dt;
......@@ -567,8 +577,6 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
return -ENOMEM;
}
fscb_od = sbi->layout.s_ods[0];
sbi->layout.s_ods[0] = NULL;
sbi->layout.s_numdevs = 0;
ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes);
if (unlikely(ret)) {
......@@ -590,14 +598,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
if (likely(numdevs > 1)) {
unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]);
sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL);
if (unlikely(!sbi)) {
sbi->layout.s_ods = kzalloc(size, GFP_KERNEL);
if (unlikely(!sbi->layout.s_ods)) {
EXOFS_ERR("ERROR: faild allocating Device array[%d]\n",
numdevs);
ret = -ENOMEM;
goto out;
}
memset(&sbi->layout.s_ods[1], 0,
size - sizeof(sbi->layout.s_ods[0]));
*psbi = sbi;
}
for (i = 0; i < numdevs; i++) {
......@@ -684,10 +691,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
if (!sbi)
return -ENOMEM;
ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
if (ret)
goto free_bdi;
/* use mount options to fill superblock */
if (opts->is_osdname) {
struct osd_dev_info odi = {.systemid_len = 0};
......@@ -709,7 +712,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
sbi->layout.group_width = 1;
sbi->layout.group_depth = -1;
sbi->layout.group_count = 1;
sbi->layout.s_ods[0] = od;
sbi->layout.s_ods = sbi->_min_one_dev;
sbi->layout.s_numdevs = 1;
sbi->layout.s_pid = opts->pid;
sbi->s_timeout = opts->timeout;
......@@ -757,9 +760,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
table_count = le64_to_cpu(fscb.s_dev_table_count);
if (table_count) {
ret = exofs_read_lookup_dev_table(&sbi, table_count);
ret = exofs_read_lookup_dev_table(sbi, od, table_count);
if (unlikely(ret))
goto free_sbi;
} else {
sbi->layout.s_ods[0] = od;
}
__sbi_read_stats(sbi);
......@@ -793,6 +798,12 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
goto free_sbi;
}
ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
if (ret) {
EXOFS_DBGMSG("Failed to bdi_setup_and_register\n");
goto free_sbi;
}
_exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
sbi->layout.s_pid);
if (opts->is_osdname)
......@@ -800,8 +811,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
return 0;
free_sbi:
bdi_destroy(&sbi->bdi);
free_bdi:
EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
opts->dev_name, sbi->layout.s_pid, ret);
exofs_free_sbi(sbi);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment