Commit a61638bc authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] reiserfs: allow multiple block insertion into the tree

I've had these reiserfs patches in -mm for many months.  We've been
undecided because they trigger bugs in a couple of apps.  But those apps
are now fixed, so it's best to get these speedups in.


From: Oleg Drokin <green@namesys.com>

This patch allows insertion of more than one "indirect" block pointer into
the tree in reiserfs.  (with all the necessary balancing code changes).
The first user of that feature is hole-creation code that is now ~1000
times more cpu-efficient for the case of large holes.
parent 130a14f4
...@@ -319,8 +319,6 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -319,8 +319,6 @@ static int balance_leaf (struct tree_balance * tb,
int new_item_len; int new_item_len;
int version; int version;
RFALSE (!is_direct_le_ih (ih),
"PAP-12075: only direct inserted item can be broken. %h", ih);
ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1); ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1);
/* Calculate item length to insert to S[0] */ /* Calculate item length to insert to S[0] */
...@@ -343,7 +341,7 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -343,7 +341,7 @@ static int balance_leaf (struct tree_balance * tb,
version = ih_version (ih); version = ih_version (ih);
/* Calculate key component, item length and body to insert into S[0] */ /* Calculate key component, item length and body to insert into S[0] */
set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + tb->lbytes ); set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + (tb->lbytes << (is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) );
put_ih_item_len( ih, new_item_len ); put_ih_item_len( ih, new_item_len );
if ( tb->lbytes > zeros_num ) { if ( tb->lbytes > zeros_num ) {
...@@ -452,23 +450,28 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -452,23 +450,28 @@ static int balance_leaf (struct tree_balance * tb,
ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)), ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)),
l_n,body, zeros_num > l_n ? l_n : zeros_num l_n,body, zeros_num > l_n ? l_n : zeros_num
); );
RFALSE( l_n &&
is_indirect_le_ih(B_N_PITEM_HEAD
(tb->L[0],
n + item_pos - ret_val)),
"PAP-12110: pasting more than 1 unformatted node pointer into indirect item");
/* 0-th item in S0 can be only of DIRECT type when l_n != 0*/ /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/
{ {
int version; int version;
int temp_l = l_n;
version = ih_version (B_N_PITEM_HEAD (tbS0, 0));
set_le_key_k_offset (version, B_N_PKEY (tbS0, 0), RFALSE (ih_item_len (B_N_PITEM_HEAD (tbS0, 0)),
le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + l_n); "PAP-12106: item length must be 0");
version = ih_version (B_N_PITEM_HEAD(tb->CFL[0],tb->lkey[0])); RFALSE (comp_short_le_keys (B_N_PKEY (tbS0, 0),
set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), B_N_PKEY (tb->L[0],
le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + l_n); n + item_pos - ret_val)),
"PAP-12107: items must be of the same file");
if (is_indirect_le_ih(B_N_PITEM_HEAD (tb->L[0],
n + item_pos - ret_val))) {
temp_l = l_n << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
}
/* update key of first item in S0 */
version = ih_version (B_N_PITEM_HEAD (tbS0, 0));
set_le_key_k_offset (version, B_N_PKEY (tbS0, 0),
le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + temp_l);
/* update left delimiting key */
set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + temp_l);
} }
/* Calculate new body, position in item and insert_size[0] */ /* Calculate new body, position in item and insert_size[0] */
...@@ -537,7 +540,7 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -537,7 +540,7 @@ static int balance_leaf (struct tree_balance * tb,
); );
/* if appended item is indirect item, put unformatted node into un list */ /* if appended item is indirect item, put unformatted node into un list */
if (is_indirect_le_ih (pasted)) if (is_indirect_le_ih (pasted))
set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); set_ih_free_space (pasted, 0);
tb->insert_size[0] = 0; tb->insert_size[0] = 0;
zeros_num = 0; zeros_num = 0;
} }
...@@ -565,15 +568,11 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -565,15 +568,11 @@ static int balance_leaf (struct tree_balance * tb,
{ /* new item or its part falls to R[0] */ { /* new item or its part falls to R[0] */
if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 ) if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 )
{ /* part of new item falls into R[0] */ { /* part of new item falls into R[0] */
int old_key_comp, old_len, r_zeros_number; loff_t old_key_comp, old_len, r_zeros_number;
const char * r_body; const char * r_body;
int version; int version;
loff_t offset; loff_t offset;
RFALSE( !is_direct_le_ih (ih),
"PAP-12135: only direct item can be split. (%h)",
ih);
leaf_shift_right(tb,tb->rnum[0]-1,-1); leaf_shift_right(tb,tb->rnum[0]-1,-1);
version = ih_version(ih); version = ih_version(ih);
...@@ -582,7 +581,7 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -582,7 +581,7 @@ static int balance_leaf (struct tree_balance * tb,
old_len = ih_item_len(ih); old_len = ih_item_len(ih);
/* Calculate key component and item length to insert into R[0] */ /* Calculate key component and item length to insert into R[0] */
offset = le_ih_k_offset( ih ) + (old_len - tb->rbytes ); offset = le_ih_k_offset( ih ) + ((old_len - tb->rbytes )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0));
set_le_ih_k_offset( ih, offset ); set_le_ih_k_offset( ih, offset );
put_ih_item_len( ih, tb->rbytes); put_ih_item_len( ih, tb->rbytes);
/* Insert part of the item into R[0] */ /* Insert part of the item into R[0] */
...@@ -590,13 +589,13 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -590,13 +589,13 @@ static int balance_leaf (struct tree_balance * tb,
bi.bi_bh = tb->R[0]; bi.bi_bh = tb->R[0];
bi.bi_parent = tb->FR[0]; bi.bi_parent = tb->FR[0];
bi.bi_position = get_right_neighbor_position (tb, 0); bi.bi_position = get_right_neighbor_position (tb, 0);
if ( offset - old_key_comp > zeros_num ) { if ( (old_len - tb->rbytes) > zeros_num ) {
r_zeros_number = 0; r_zeros_number = 0;
r_body = body + offset - old_key_comp - zeros_num; r_body = body + (old_len - tb->rbytes) - zeros_num;
} }
else { else {
r_body = body; r_body = body;
r_zeros_number = zeros_num - (offset - old_key_comp); r_zeros_number = zeros_num - (old_len - tb->rbytes);
zeros_num -= r_zeros_number; zeros_num -= r_zeros_number;
} }
...@@ -707,12 +706,17 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -707,12 +706,17 @@ static int balance_leaf (struct tree_balance * tb,
{ {
int version; int version;
unsigned long temp_rem = n_rem;
version = ih_version (B_N_PITEM_HEAD (tb->R[0],0)); version = ih_version (B_N_PITEM_HEAD (tb->R[0],0));
if (is_indirect_le_key(version,B_N_PKEY(tb->R[0],0))){
temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits -
UNFM_P_SHIFT);
}
set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0), set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0),
le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + n_rem); le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + temp_rem);
set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]), set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]),
le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + n_rem); le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + temp_rem);
} }
/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
...@@ -736,13 +740,12 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -736,13 +740,12 @@ static int balance_leaf (struct tree_balance * tb,
leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number); leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number);
if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) { if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) {
#if 0
RFALSE( n_rem, RFALSE( n_rem,
"PAP-12160: paste more than one unformatted node pointer"); "PAP-12160: paste more than one unformatted node pointer");
#endif
set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), ((struct unfm_nodeinfo*)body)->unfm_freespace); set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), 0);
} }
tb->insert_size[0] = n_rem; tb->insert_size[0] = n_rem;
if ( ! n_rem ) if ( ! n_rem )
pos_in_item ++; pos_in_item ++;
...@@ -781,7 +784,7 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -781,7 +784,7 @@ static int balance_leaf (struct tree_balance * tb,
} }
if (is_indirect_le_ih (pasted)) if (is_indirect_le_ih (pasted))
set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); set_ih_free_space (pasted, 0);
zeros_num = tb->insert_size[0] = 0; zeros_num = tb->insert_size[0] = 0;
} }
} }
...@@ -858,12 +861,6 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -858,12 +861,6 @@ static int balance_leaf (struct tree_balance * tb,
const char * r_body; const char * r_body;
int version; int version;
RFALSE( !is_direct_le_ih(ih),
/* The items which can be inserted are:
Stat_data item, direct item, indirect item and directory item which consist of only two entries "." and "..".
These items must not be broken except for a direct one. */
"PAP-12205: non-direct item can not be broken when inserting");
/* Move snum[i]-1 items from S[0] to S_new[i] */ /* Move snum[i]-1 items from S[0] to S_new[i] */
leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]); leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]);
/* Remember key component and item length */ /* Remember key component and item length */
...@@ -873,7 +870,7 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -873,7 +870,7 @@ static int balance_leaf (struct tree_balance * tb,
/* Calculate key component and item length to insert into S_new[i] */ /* Calculate key component and item length to insert into S_new[i] */
set_le_ih_k_offset( ih, set_le_ih_k_offset( ih,
le_ih_k_offset(ih) + (old_len - sbytes[i] ) ); le_ih_k_offset(ih) + ((old_len - sbytes[i] )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) );
put_ih_item_len( ih, sbytes[i] ); put_ih_item_len( ih, sbytes[i] );
...@@ -883,13 +880,13 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -883,13 +880,13 @@ static int balance_leaf (struct tree_balance * tb,
bi.bi_parent = 0; bi.bi_parent = 0;
bi.bi_position = 0; bi.bi_position = 0;
if ( le_ih_k_offset (ih) - old_key_comp > zeros_num ) { if ( (old_len - sbytes[i]) > zeros_num ) {
r_zeros_number = 0; r_zeros_number = 0;
r_body = body + (le_ih_k_offset(ih) - old_key_comp) - zeros_num; r_body = body + (old_len - sbytes[i]) - zeros_num;
} }
else { else {
r_body = body; r_body = body;
r_zeros_number = zeros_num - (le_ih_k_offset (ih) - old_key_comp); r_zeros_number = zeros_num - (old_len - sbytes[i]);
zeros_num -= r_zeros_number; zeros_num -= r_zeros_number;
} }
...@@ -1010,11 +1007,13 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -1010,11 +1007,13 @@ static int balance_leaf (struct tree_balance * tb,
tmp = B_N_PITEM_HEAD(S_new[i],0); tmp = B_N_PITEM_HEAD(S_new[i],0);
if (is_indirect_le_ih (tmp)) { if (is_indirect_le_ih (tmp)) {
if (n_rem) set_ih_free_space (tmp, 0);
reiserfs_panic (tb->tb_sb, "PAP-12230: balance_leaf: invalid action with indirect item"); set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
set_ih_free_space (tmp, ((struct unfm_nodeinfo*)body)->unfm_freespace); (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
} else {
set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
n_rem );
} }
set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) + n_rem );
} }
tb->insert_size[0] = n_rem; tb->insert_size[0] = n_rem;
...@@ -1060,7 +1059,7 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -1060,7 +1059,7 @@ static int balance_leaf (struct tree_balance * tb,
/* if we paste to indirect item update ih_free_space */ /* if we paste to indirect item update ih_free_space */
if (is_indirect_le_ih (pasted)) if (is_indirect_le_ih (pasted))
set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); set_ih_free_space (pasted, 0);
zeros_num = tb->insert_size[0] = 0; zeros_num = tb->insert_size[0] = 0;
} }
} }
...@@ -1152,11 +1151,12 @@ static int balance_leaf (struct tree_balance * tb, ...@@ -1152,11 +1151,12 @@ static int balance_leaf (struct tree_balance * tb,
leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num); leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
if (is_indirect_le_ih (pasted)) { if (is_indirect_le_ih (pasted)) {
#if 0
RFALSE( tb->insert_size[0] != UNFM_P_SIZE, RFALSE( tb->insert_size[0] != UNFM_P_SIZE,
"PAP-12280: insert_size for indirect item must be %d, not %d", "PAP-12280: insert_size for indirect item must be %d, not %d",
UNFM_P_SIZE, tb->insert_size[0]); UNFM_P_SIZE, tb->insert_size[0]);
set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); #endif
set_ih_free_space (pasted, 0);
} }
tb->insert_size[0] = 0; tb->insert_size[0] = 0;
} }
......
...@@ -766,7 +766,11 @@ int reiserfs_get_block (struct inode * inode, sector_t block, ...@@ -766,7 +766,11 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
pointer to 'block'-th block use block, which is already pointer to 'block'-th block use block, which is already
allocated */ allocated */
struct cpu_key tmp_key; struct cpu_key tmp_key;
struct unfm_nodeinfo un = {0, 0}; unp_t unf_single=0; // We use this in case we need to allocate only
// one block which is a fastpath
unp_t *un;
__u64 max_to_insert=MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE;
__u64 blocks_needed;
RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
"vs-804: invalid position for append"); "vs-804: invalid position for append");
...@@ -775,30 +779,58 @@ int reiserfs_get_block (struct inode * inode, sector_t block, ...@@ -775,30 +779,58 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize), le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize),
//pos_in_item * inode->i_sb->s_blocksize, //pos_in_item * inode->i_sb->s_blocksize,
TYPE_INDIRECT, 3);// key type is unimportant TYPE_INDIRECT, 3);// key type is unimportant
if (cpu_key_k_offset (&tmp_key) == cpu_key_k_offset (&key)) { blocks_needed = 1 + ((cpu_key_k_offset (&key) - cpu_key_k_offset (&tmp_key)) >> inode->i_sb->s_blocksize_bits);
RFALSE( blocks_needed < 0, "green-805: invalid offset");
if ( blocks_needed == 1 ) {
un = &unf_single;
} else {
un=kmalloc( min(blocks_needed,max_to_insert)*UNFM_P_SIZE,
GFP_ATOMIC); // We need to avoid scheduling.
if ( !un) {
un = &unf_single;
blocks_needed = 1;
max_to_insert = 0;
} else
memset(un, 0, UNFM_P_SIZE * min(blocks_needed,max_to_insert));
}
if ( blocks_needed <= max_to_insert) {
/* we are going to add target block to the file. Use allocated /* we are going to add target block to the file. Use allocated
block for that */ block for that */
un.unfm_nodenum = cpu_to_le32 (allocated_block_nr); un[blocks_needed-1] = cpu_to_le32 (allocated_block_nr);
set_block_dev_mapped (bh_result, allocated_block_nr, inode); set_block_dev_mapped (bh_result, allocated_block_nr, inode);
set_buffer_new(bh_result); set_buffer_new(bh_result);
done = 1; done = 1;
} else { } else {
/* paste hole to the indirect item */ /* paste hole to the indirect item */
/* If kmalloc failed, max_to_insert becomes zero and it means we
only have space for one block */
blocks_needed=max_to_insert?max_to_insert:1;
} }
retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)&un, UNFM_P_SIZE); retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)un, UNFM_P_SIZE * blocks_needed);
if (blocks_needed != 1)
kfree(un);
if (retval) { if (retval) {
reiserfs_free_block (&th, allocated_block_nr); reiserfs_free_block (&th, allocated_block_nr);
goto failure; goto failure;
} }
if (un.unfm_nodenum) if (done) {
inode->i_blocks += inode->i_sb->s_blocksize / 512; inode->i_blocks += inode->i_sb->s_blocksize / 512;
} else {
/* We need to mark new file size in case this function will be
interrupted/aborted later on. And we may do this only for
holes. */
inode->i_size += inode->i_sb->s_blocksize * blocks_needed;
}
//mark_tail_converted (inode); //mark_tail_converted (inode);
} }
if (done == 1) if (done == 1)
break; break;
/* this loop could log more blocks than we had originally asked /* this loop could log more blocks than we had originally asked
** for. So, we have to allow the transaction to end if it is ** for. So, we have to allow the transaction to end if it is
** too big or too full. Update the inode so things are ** too big or too full. Update the inode so things are
......
...@@ -30,7 +30,7 @@ int direct2indirect (struct reiserfs_transaction_handle *th, struct inode * inod ...@@ -30,7 +30,7 @@ int direct2indirect (struct reiserfs_transaction_handle *th, struct inode * inod
key of unfm pointer to be pasted */ key of unfm pointer to be pasted */
int n_blk_size, int n_blk_size,
n_retval; /* returned value for reiserfs_insert_item and clones */ n_retval; /* returned value for reiserfs_insert_item and clones */
struct unfm_nodeinfo unfm_ptr; /* Handle on an unformatted node unp_t unfm_ptr; /* Handle on an unformatted node
that will be inserted in the that will be inserted in the
tree. */ tree. */
...@@ -59,8 +59,7 @@ int direct2indirect (struct reiserfs_transaction_handle *th, struct inode * inod ...@@ -59,8 +59,7 @@ int direct2indirect (struct reiserfs_transaction_handle *th, struct inode * inod
p_le_ih = PATH_PITEM_HEAD (path); p_le_ih = PATH_PITEM_HEAD (path);
unfm_ptr.unfm_nodenum = cpu_to_le32 (unbh->b_blocknr); unfm_ptr = cpu_to_le32 (unbh->b_blocknr);
unfm_ptr.unfm_freespace = 0; // ???
if ( is_statdata_le_ih (p_le_ih) ) { if ( is_statdata_le_ih (p_le_ih) ) {
/* Insert new indirect item. */ /* Insert new indirect item. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment