Commit fdc0212e authored by Zheng Liu's avatar Zheng Liu Committed by Theodore Ts'o

ext4: add physical block and status member into extent status tree

This commit adds two members in extent_status structure to let it record
physical block and extent status.  Here es_pblk is used to record both
of them because physical block only has 48 bits.  So extent status could
be stashed into it so that we can save some memory.  Now written,
unwritten, delayed and hole are defined as status.

Due to new member is added into extent status tree, all interfaces need
to be adjusted.
Signed-off-by: default avatarZheng Liu <wenqing.lz@taobao.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
parent 06b0c886
...@@ -179,7 +179,9 @@ static void ext4_es_print_tree(struct inode *inode) ...@@ -179,7 +179,9 @@ static void ext4_es_print_tree(struct inode *inode)
while (node) { while (node) {
struct extent_status *es; struct extent_status *es;
es = rb_entry(node, struct extent_status, rb_node); es = rb_entry(node, struct extent_status, rb_node);
printk(KERN_DEBUG " [%u/%u)", es->es_lblk, es->es_len); printk(KERN_DEBUG " [%u/%u) %llu %llx",
es->es_lblk, es->es_len,
ext4_es_pblock(es), ext4_es_status(es));
node = rb_next(node); node = rb_next(node);
} }
printk(KERN_DEBUG "\n"); printk(KERN_DEBUG "\n");
...@@ -234,7 +236,7 @@ static struct extent_status *__es_tree_search(struct rb_root *root, ...@@ -234,7 +236,7 @@ static struct extent_status *__es_tree_search(struct rb_root *root,
* @es: delayed extent that we found * @es: delayed extent that we found
* *
* Returns the first block of the next extent after es, otherwise * Returns the first block of the next extent after es, otherwise
* EXT_MAX_BLOCKS if no delay extent is found. * EXT_MAX_BLOCKS if no extent is found.
* Delayed extent is returned via @es. * Delayed extent is returned via @es.
*/ */
ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es) ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es)
...@@ -249,17 +251,18 @@ ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es) ...@@ -249,17 +251,18 @@ ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es)
read_lock(&EXT4_I(inode)->i_es_lock); read_lock(&EXT4_I(inode)->i_es_lock);
tree = &EXT4_I(inode)->i_es_tree; tree = &EXT4_I(inode)->i_es_tree;
/* find delay extent in cache firstly */ /* find extent in cache firstly */
es->es_len = es->es_pblk = 0;
if (tree->cache_es) { if (tree->cache_es) {
es1 = tree->cache_es; es1 = tree->cache_es;
if (in_range(es->es_lblk, es1->es_lblk, es1->es_len)) { if (in_range(es->es_lblk, es1->es_lblk, es1->es_len)) {
es_debug("%u cached by [%u/%u)\n", es_debug("%u cached by [%u/%u) %llu %llx\n",
es->es_lblk, es1->es_lblk, es1->es_len); es->es_lblk, es1->es_lblk, es1->es_len,
ext4_es_pblock(es1), ext4_es_status(es1));
goto out; goto out;
} }
} }
es->es_len = 0;
es1 = __es_tree_search(&tree->root, es->es_lblk); es1 = __es_tree_search(&tree->root, es->es_lblk);
out: out:
...@@ -267,6 +270,7 @@ ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es) ...@@ -267,6 +270,7 @@ ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es)
tree->cache_es = es1; tree->cache_es = es1;
es->es_lblk = es1->es_lblk; es->es_lblk = es1->es_lblk;
es->es_len = es1->es_len; es->es_len = es1->es_len;
es->es_pblk = es1->es_pblk;
node = rb_next(&es1->rb_node); node = rb_next(&es1->rb_node);
if (node) { if (node) {
es1 = rb_entry(node, struct extent_status, rb_node); es1 = rb_entry(node, struct extent_status, rb_node);
...@@ -281,7 +285,7 @@ ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es) ...@@ -281,7 +285,7 @@ ext4_lblk_t ext4_es_find_extent(struct inode *inode, struct extent_status *es)
} }
static struct extent_status * static struct extent_status *
ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len) ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk)
{ {
struct extent_status *es; struct extent_status *es;
es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC); es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
...@@ -289,6 +293,7 @@ ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len) ...@@ -289,6 +293,7 @@ ext4_es_alloc_extent(ext4_lblk_t lblk, ext4_lblk_t len)
return NULL; return NULL;
es->es_lblk = lblk; es->es_lblk = lblk;
es->es_len = len; es->es_len = len;
es->es_pblk = pblk;
return es; return es;
} }
...@@ -301,6 +306,8 @@ static void ext4_es_free_extent(struct extent_status *es) ...@@ -301,6 +306,8 @@ static void ext4_es_free_extent(struct extent_status *es)
* Check whether or not two extents can be merged * Check whether or not two extents can be merged
* Condition: * Condition:
* - logical block number is contiguous * - logical block number is contiguous
* - physical block number is contiguous
* - status is equal
*/ */
static int ext4_es_can_be_merged(struct extent_status *es1, static int ext4_es_can_be_merged(struct extent_status *es1,
struct extent_status *es2) struct extent_status *es2)
...@@ -308,6 +315,13 @@ static int ext4_es_can_be_merged(struct extent_status *es1, ...@@ -308,6 +315,13 @@ static int ext4_es_can_be_merged(struct extent_status *es1,
if (es1->es_lblk + es1->es_len != es2->es_lblk) if (es1->es_lblk + es1->es_len != es2->es_lblk)
return 0; return 0;
if (ext4_es_status(es1) != ext4_es_status(es2))
return 0;
if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
(ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2)))
return 0;
return 1; return 1;
} }
...@@ -371,6 +385,10 @@ static int __es_insert_extent(struct ext4_es_tree *tree, ...@@ -371,6 +385,10 @@ static int __es_insert_extent(struct ext4_es_tree *tree,
*/ */
es->es_lblk = newes->es_lblk; es->es_lblk = newes->es_lblk;
es->es_len += newes->es_len; es->es_len += newes->es_len;
if (ext4_es_is_written(es) ||
ext4_es_is_unwritten(es))
ext4_es_store_pblock(es,
newes->es_pblk);
es = ext4_es_try_to_merge_left(tree, es); es = ext4_es_try_to_merge_left(tree, es);
goto out; goto out;
} }
...@@ -388,7 +406,8 @@ static int __es_insert_extent(struct ext4_es_tree *tree, ...@@ -388,7 +406,8 @@ static int __es_insert_extent(struct ext4_es_tree *tree,
} }
} }
es = ext4_es_alloc_extent(newes->es_lblk, newes->es_len); es = ext4_es_alloc_extent(newes->es_lblk, newes->es_len,
newes->es_pblk);
if (!es) if (!es)
return -ENOMEM; return -ENOMEM;
rb_link_node(&es->rb_node, parent, p); rb_link_node(&es->rb_node, parent, p);
...@@ -408,21 +427,24 @@ static int __es_insert_extent(struct ext4_es_tree *tree, ...@@ -408,21 +427,24 @@ static int __es_insert_extent(struct ext4_es_tree *tree,
* Return 0 on success, error code on failure. * Return 0 on success, error code on failure.
*/ */
int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len) ext4_lblk_t len, ext4_fsblk_t pblk,
unsigned long long status)
{ {
struct ext4_es_tree *tree; struct ext4_es_tree *tree;
struct extent_status newes; struct extent_status newes;
ext4_lblk_t end = lblk + len - 1; ext4_lblk_t end = lblk + len - 1;
int err = 0; int err = 0;
trace_ext4_es_insert_extent(inode, lblk, len); es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n",
es_debug("add [%u/%u) to extent status tree of inode %lu\n", lblk, len, pblk, status, inode->i_ino);
lblk, len, inode->i_ino);
BUG_ON(end < lblk); BUG_ON(end < lblk);
newes.es_lblk = lblk; newes.es_lblk = lblk;
newes.es_len = len; newes.es_len = len;
ext4_es_store_pblock(&newes, pblk);
ext4_es_store_status(&newes, status);
trace_ext4_es_insert_extent(inode, &newes);
write_lock(&EXT4_I(inode)->i_es_lock); write_lock(&EXT4_I(inode)->i_es_lock);
tree = &EXT4_I(inode)->i_es_tree; tree = &EXT4_I(inode)->i_es_tree;
...@@ -446,6 +468,7 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk, ...@@ -446,6 +468,7 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
struct extent_status *es; struct extent_status *es;
struct extent_status orig_es; struct extent_status orig_es;
ext4_lblk_t len1, len2; ext4_lblk_t len1, len2;
ext4_fsblk_t block;
int err = 0; int err = 0;
es = __es_tree_search(&tree->root, lblk); es = __es_tree_search(&tree->root, lblk);
...@@ -459,6 +482,8 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk, ...@@ -459,6 +482,8 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
orig_es.es_lblk = es->es_lblk; orig_es.es_lblk = es->es_lblk;
orig_es.es_len = es->es_len; orig_es.es_len = es->es_len;
orig_es.es_pblk = es->es_pblk;
len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0; len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0; len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
if (len1 > 0) if (len1 > 0)
...@@ -469,6 +494,13 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk, ...@@ -469,6 +494,13 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
newes.es_lblk = end + 1; newes.es_lblk = end + 1;
newes.es_len = len2; newes.es_len = len2;
if (ext4_es_is_written(&orig_es) ||
ext4_es_is_unwritten(&orig_es)) {
block = ext4_es_pblock(&orig_es) +
orig_es.es_len - len2;
ext4_es_store_pblock(&newes, block);
}
ext4_es_store_status(&newes, ext4_es_status(&orig_es));
err = __es_insert_extent(tree, &newes); err = __es_insert_extent(tree, &newes);
if (err) { if (err) {
es->es_lblk = orig_es.es_lblk; es->es_lblk = orig_es.es_lblk;
...@@ -478,6 +510,11 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk, ...@@ -478,6 +510,11 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
} else { } else {
es->es_lblk = end + 1; es->es_lblk = end + 1;
es->es_len = len2; es->es_len = len2;
if (ext4_es_is_written(es) ||
ext4_es_is_unwritten(es)) {
block = orig_es.es_pblk + orig_es.es_len - len2;
ext4_es_store_pblock(es, block);
}
} }
goto out; goto out;
} }
...@@ -502,9 +539,15 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk, ...@@ -502,9 +539,15 @@ static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
} }
if (es && es->es_lblk < end + 1) { if (es && es->es_lblk < end + 1) {
ext4_lblk_t orig_len = es->es_len;
len1 = ext4_es_end(es) - end; len1 = ext4_es_end(es) - end;
es->es_lblk = end + 1; es->es_lblk = end + 1;
es->es_len = len1; es->es_len = len1;
if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
block = es->es_pblk + orig_len - len1;
ext4_es_store_pblock(es, block);
}
} }
out: out:
......
...@@ -20,10 +20,21 @@ ...@@ -20,10 +20,21 @@
#define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif #endif
#define EXTENT_STATUS_WRITTEN 0x80000000 /* written extent */
#define EXTENT_STATUS_UNWRITTEN 0x40000000 /* unwritten extent */
#define EXTENT_STATUS_DELAYED 0x20000000 /* delayed extent */
#define EXTENT_STATUS_HOLE 0x10000000 /* hole */
#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
EXTENT_STATUS_UNWRITTEN | \
EXTENT_STATUS_DELAYED | \
EXTENT_STATUS_HOLE)
struct extent_status { struct extent_status {
struct rb_node rb_node; struct rb_node rb_node;
ext4_lblk_t es_lblk; /* first logical block extent covers */ ext4_lblk_t es_lblk; /* first logical block extent covers */
ext4_lblk_t es_len; /* length of extent in block */ ext4_lblk_t es_len; /* length of extent in block */
ext4_fsblk_t es_pblk; /* first physical block */
}; };
struct ext4_es_tree { struct ext4_es_tree {
...@@ -36,10 +47,61 @@ extern void ext4_exit_es(void); ...@@ -36,10 +47,61 @@ extern void ext4_exit_es(void);
extern void ext4_es_init_tree(struct ext4_es_tree *tree); extern void ext4_es_init_tree(struct ext4_es_tree *tree);
extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len); ext4_lblk_t len, ext4_fsblk_t pblk,
unsigned long long status);
extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len); ext4_lblk_t len);
extern ext4_lblk_t ext4_es_find_extent(struct inode *inode, extern ext4_lblk_t ext4_es_find_extent(struct inode *inode,
struct extent_status *es); struct extent_status *es);
static inline int ext4_es_is_written(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_WRITTEN);
}
static inline int ext4_es_is_unwritten(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_UNWRITTEN);
}
static inline int ext4_es_is_delayed(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_DELAYED);
}
static inline int ext4_es_is_hole(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_HOLE);
}
static inline ext4_fsblk_t ext4_es_status(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_FLAGS);
}
static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
{
return (es->es_pblk & ~EXTENT_STATUS_FLAGS);
}
static inline void ext4_es_store_pblock(struct extent_status *es,
ext4_fsblk_t pb)
{
ext4_fsblk_t block;
block = (pb & ~EXTENT_STATUS_FLAGS) |
(es->es_pblk & EXTENT_STATUS_FLAGS);
es->es_pblk = block;
}
static inline void ext4_es_store_status(struct extent_status *es,
unsigned long long status)
{
ext4_fsblk_t block;
block = (status & EXTENT_STATUS_FLAGS) |
(es->es_pblk & ~EXTENT_STATUS_FLAGS);
es->es_pblk = block;
}
#endif /* _EXT4_EXTENTS_STATUS_H */ #endif /* _EXT4_EXTENTS_STATUS_H */
...@@ -1784,7 +1784,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1784,7 +1784,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
goto out_unlock; goto out_unlock;
} }
retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len); retval = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
~0, EXTENT_STATUS_DELAYED);
if (retval) if (retval)
goto out_unlock; goto out_unlock;
......
...@@ -2093,28 +2093,33 @@ TRACE_EVENT(ext4_ext_remove_space_done, ...@@ -2093,28 +2093,33 @@ TRACE_EVENT(ext4_ext_remove_space_done,
); );
TRACE_EVENT(ext4_es_insert_extent, TRACE_EVENT(ext4_es_insert_extent,
TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len), TP_PROTO(struct inode *inode, struct extent_status *es),
TP_ARGS(inode, lblk, len), TP_ARGS(inode, es),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( dev_t, dev ) __field( dev_t, dev )
__field( ino_t, ino ) __field( ino_t, ino )
__field( loff_t, lblk ) __field( ext4_lblk_t, lblk )
__field( loff_t, len ) __field( ext4_lblk_t, len )
__field( ext4_fsblk_t, pblk )
__field( unsigned long long, status )
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = inode->i_sb->s_dev; __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->lblk = lblk; __entry->lblk = es->es_lblk;
__entry->len = len; __entry->len = es->es_len;
__entry->pblk = ext4_es_pblock(es);
__entry->status = ext4_es_status(es);
), ),
TP_printk("dev %d,%d ino %lu es [%lld/%lld)", TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, (unsigned long) __entry->ino,
__entry->lblk, __entry->len) __entry->lblk, __entry->len,
__entry->pblk, __entry->status)
); );
TRACE_EVENT(ext4_es_remove_extent, TRACE_EVENT(ext4_es_remove_extent,
...@@ -2175,6 +2180,8 @@ TRACE_EVENT(ext4_es_find_extent_exit, ...@@ -2175,6 +2180,8 @@ TRACE_EVENT(ext4_es_find_extent_exit,
__field( ino_t, ino ) __field( ino_t, ino )
__field( ext4_lblk_t, lblk ) __field( ext4_lblk_t, lblk )
__field( ext4_lblk_t, len ) __field( ext4_lblk_t, len )
__field( ext4_fsblk_t, pblk )
__field( unsigned long long, status )
__field( ext4_lblk_t, ret ) __field( ext4_lblk_t, ret )
), ),
...@@ -2183,13 +2190,16 @@ TRACE_EVENT(ext4_es_find_extent_exit, ...@@ -2183,13 +2190,16 @@ TRACE_EVENT(ext4_es_find_extent_exit,
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->lblk = es->es_lblk; __entry->lblk = es->es_lblk;
__entry->len = es->es_len; __entry->len = es->es_len;
__entry->pblk = ext4_es_pblock(es);
__entry->status = ext4_es_status(es);
__entry->ret = ret; __entry->ret = ret;
), ),
TP_printk("dev %d,%d ino %lu es [%u/%u) ret %u", TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %llx ret %u",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, (unsigned long) __entry->ino,
__entry->lblk, __entry->len, __entry->ret) __entry->lblk, __entry->len,
__entry->pblk, __entry->status, __entry->ret)
); );
#endif /* _TRACE_EXT4_H */ #endif /* _TRACE_EXT4_H */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment