Commit f7fec032 authored by Zheng Liu's avatar Zheng Liu Committed by Theodore Ts'o

ext4: track all extent status in extent status tree

By recording the phycisal block and status, extent status tree is able
to track the status of every extents.  When we call _map_blocks
functions to lookup an extent or create a new written/unwritten/delayed
extent, this extent will be inserted into extent status tree.

We don't load all extents from disk in alloc_inode() because it costs
too much memory, and if a file is opened and closed frequently it will
takes too much time to load all extent information.  So currently when
we create/lookup an extent, this extent will be inserted into extent
status tree.  Hence, the extent status tree may not comprehensively
contain all of the extents found in the file.

Here a condition we need to take care is that an extent might contains
unwritten and delayed status simultaneously because an extent is delayed
allocated and could be allocated by fallocate.  At this time we need to
keep delayed status because later we need to update delayed reservation
space using it.
Signed-off-by: default avatarZheng Liu <wenqing.lz@taobao.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
Cc: Jan kara <jack@suse.cz>
parent a25a4e1a
...@@ -2602,6 +2602,9 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, ...@@ -2602,6 +2602,9 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path *); struct ext4_ext_path *);
extern void ext4_ext_drop_refs(struct ext4_ext_path *); extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode); extern int ext4_ext_check_inode(struct inode *inode);
extern int ext4_find_delalloc_range(struct inode *inode,
ext4_lblk_t lblk_start,
ext4_lblk_t lblk_end);
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len); __u64 start, __u64 len);
......
...@@ -2076,8 +2076,18 @@ static int ext4_fill_fiemap_extents(struct inode *inode, ...@@ -2076,8 +2076,18 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
break; break;
} }
/* This is possible iff next == next_del == EXT_MAX_BLOCKS */ /*
if (next == next_del) { * This is possible iff next == next_del == EXT_MAX_BLOCKS.
* we need to check next == EXT_MAX_BLOCKS because it is
* possible that an extent is with unwritten and delayed
* status due to when an extent is delayed allocated and
* is allocated by fallocate status tree will track both of
* them in a extent.
*
* So we could return a unwritten and delayed extent, and
* its block is equal to 'next'.
*/
if (next == next_del && next == EXT_MAX_BLOCKS) {
flags |= FIEMAP_EXTENT_LAST; flags |= FIEMAP_EXTENT_LAST;
if (unlikely(next_del != EXT_MAX_BLOCKS || if (unlikely(next_del != EXT_MAX_BLOCKS ||
next != EXT_MAX_BLOCKS)) { next != EXT_MAX_BLOCKS)) {
...@@ -3522,9 +3532,9 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, ...@@ -3522,9 +3532,9 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
* *
* Return 1 if there is a delalloc block in the range, otherwise 0. * Return 1 if there is a delalloc block in the range, otherwise 0.
*/ */
static int ext4_find_delalloc_range(struct inode *inode, int ext4_find_delalloc_range(struct inode *inode,
ext4_lblk_t lblk_start, ext4_lblk_t lblk_start,
ext4_lblk_t lblk_end) ext4_lblk_t lblk_end)
{ {
struct extent_status es; struct extent_status es;
......
...@@ -526,20 +526,26 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -526,20 +526,26 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
retval = ext4_ind_map_blocks(handle, inode, map, flags & retval = ext4_ind_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE); EXT4_GET_BLOCKS_KEEP_SIZE);
} }
if (retval > 0) {
int ret;
unsigned long long status;
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
ext4_find_delalloc_range(inode, map->m_lblk,
map->m_lblk + map->m_len - 1))
status |= EXTENT_STATUS_DELAYED;
ret = ext4_es_insert_extent(inode, map->m_lblk,
map->m_len, map->m_pblk, status);
if (ret < 0)
retval = ret;
}
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
up_read((&EXT4_I(inode)->i_data_sem)); up_read((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
int ret; int ret = check_block_validity(inode, map);
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
/* delayed alloc may be allocated by fallocate and
* coverted to initialized by directIO.
* we need to handle delayed extent here.
*/
down_write((&EXT4_I(inode)->i_data_sem));
goto delayed_mapped;
}
ret = check_block_validity(inode, map);
if (ret != 0) if (ret != 0)
return ret; return ret;
} }
...@@ -608,18 +614,23 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -608,18 +614,23 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
ext4_da_update_reserve_space(inode, retval, 1); ext4_da_update_reserve_space(inode, retval, 1);
} }
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { if (retval > 0) {
int ret; int ret;
delayed_mapped: unsigned long long status;
/* delayed allocation blocks has been allocated */
ret = ext4_es_remove_extent(inode, map->m_lblk, status = map->m_flags & EXT4_MAP_UNWRITTEN ?
map->m_len); EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
if (ret < 0) if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
retval = ret; ext4_find_delalloc_range(inode, map->m_lblk,
} map->m_lblk + map->m_len - 1))
status |= EXTENT_STATUS_DELAYED;
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
map->m_pblk, status);
if (ret < 0)
retval = ret;
} }
up_write((&EXT4_I(inode)->i_data_sem)); up_write((&EXT4_I(inode)->i_data_sem));
...@@ -1765,6 +1776,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1765,6 +1776,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
retval = ext4_ind_map_blocks(NULL, inode, map, 0); retval = ext4_ind_map_blocks(NULL, inode, map, 0);
if (retval == 0) { if (retval == 0) {
int ret;
/* /*
* XXX: __block_prepare_write() unmaps passed block, * XXX: __block_prepare_write() unmaps passed block,
* is it OK? * is it OK?
...@@ -1772,16 +1784,20 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1772,16 +1784,20 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
/* If the block was allocated from previously allocated cluster, /* If the block was allocated from previously allocated cluster,
* then we dont need to reserve it again. */ * then we dont need to reserve it again. */
if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
retval = ext4_da_reserve_space(inode, iblock); ret = ext4_da_reserve_space(inode, iblock);
if (retval) if (ret) {
/* not enough space to reserve */ /* not enough space to reserve */
retval = ret;
goto out_unlock; goto out_unlock;
}
} }
retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
~0, EXTENT_STATUS_DELAYED); ~0, EXTENT_STATUS_DELAYED);
if (retval) if (ret) {
retval = ret;
goto out_unlock; goto out_unlock;
}
/* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
* and it should not appear on the bh->b_state. * and it should not appear on the bh->b_state.
...@@ -1791,6 +1807,16 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1791,6 +1807,16 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
map_bh(bh, inode->i_sb, invalid_block); map_bh(bh, inode->i_sb, invalid_block);
set_buffer_new(bh); set_buffer_new(bh);
set_buffer_delay(bh); set_buffer_delay(bh);
} else if (retval > 0) {
int ret;
unsigned long long status;
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
map->m_pblk, status);
if (ret != 0)
retval = ret;
} }
out_unlock: out_unlock:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment