Commit d79b7c26 authored by Nikolay Borisov's avatar Nikolay Borisov Committed by David Sterba

btrfs: Speed up btrfs_file_llseek

Modifying the file position is done on a per-file basis. This renders
holding the inode lock for writing useless and makes the performance of
concurrent llseek's abysmal.

Fix this by holding the inode for read. This provides protection against
concurrent truncates and find_desired_extent already includes proper
extent locking for the range which ensures proper locking against
concurrent writes. SEEK_CUR and SEEK_END can be done lockessly.

The former is synchronized by file::f_lock spinlock. SEEK_END is not
synchronized but atomic, but that's OK since there is not guarantee that
SEEK_END will always be at the end of the file in the face of tail
modifications.

This change brings ~82% performance improvement when doing a lot of
parallel fseeks. The workload essentially does:

    for (d=0; d<num_seek_read; d++)
      {
	/* offset %= 16777216; */
	fseek (f, 256 * d % 16777216, SEEK_SET);
	fread (buffer, 64, 1, f);
      }

Without patch:

num workprocesses = 16
num fseek/fread = 8000000
step = 256
fork 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15

real	0m41.412s
user	0m28.777s
sys	2m16.510s

With patch:

num workprocesses = 16
num fseek/fread = 8000000
step = 256
fork 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15

real	0m11.479s
user	0m27.629s
sys	0m21.040s
Signed-off-by: default avatarNikolay Borisov <nborisov@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 0cf25213
...@@ -3356,13 +3356,14 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) ...@@ -3356,13 +3356,14 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_map *em = NULL; struct extent_map *em = NULL;
struct extent_state *cached_state = NULL; struct extent_state *cached_state = NULL;
loff_t i_size = inode->i_size;
u64 lockstart; u64 lockstart;
u64 lockend; u64 lockend;
u64 start; u64 start;
u64 len; u64 len;
int ret = 0; int ret = 0;
if (inode->i_size == 0) if (i_size == 0 || *offset >= i_size)
return -ENXIO; return -ENXIO;
/* /*
...@@ -3372,8 +3373,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) ...@@ -3372,8 +3373,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
start = max_t(loff_t, 0, *offset); start = max_t(loff_t, 0, *offset);
lockstart = round_down(start, fs_info->sectorsize); lockstart = round_down(start, fs_info->sectorsize);
lockend = round_up(i_size_read(inode), lockend = round_up(i_size, fs_info->sectorsize);
fs_info->sectorsize);
if (lockend <= lockstart) if (lockend <= lockstart)
lockend = lockstart + fs_info->sectorsize; lockend = lockstart + fs_info->sectorsize;
lockend--; lockend--;
...@@ -3382,7 +3382,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) ...@@ -3382,7 +3382,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state); &cached_state);
while (start < inode->i_size) { while (start < i_size) {
em = btrfs_get_extent_fiemap(BTRFS_I(inode), start, len); em = btrfs_get_extent_fiemap(BTRFS_I(inode), start, len);
if (IS_ERR(em)) { if (IS_ERR(em)) {
ret = PTR_ERR(em); ret = PTR_ERR(em);
...@@ -3406,10 +3406,10 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) ...@@ -3406,10 +3406,10 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
} }
free_extent_map(em); free_extent_map(em);
if (!ret) { if (!ret) {
if (whence == SEEK_DATA && start >= inode->i_size) if (whence == SEEK_DATA && start >= i_size)
ret = -ENXIO; ret = -ENXIO;
else else
*offset = min_t(loff_t, start, inode->i_size); *offset = min_t(loff_t, start, i_size);
} }
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state); &cached_state);
...@@ -3421,7 +3421,6 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) ...@@ -3421,7 +3421,6 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
int ret; int ret;
inode_lock(inode);
switch (whence) { switch (whence) {
case SEEK_END: case SEEK_END:
case SEEK_CUR: case SEEK_CUR:
...@@ -3429,21 +3428,16 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) ...@@ -3429,21 +3428,16 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
goto out; goto out;
case SEEK_DATA: case SEEK_DATA:
case SEEK_HOLE: case SEEK_HOLE:
if (offset >= i_size_read(inode)) { inode_lock_shared(inode);
inode_unlock(inode);
return -ENXIO;
}
ret = find_desired_extent(inode, &offset, whence); ret = find_desired_extent(inode, &offset, whence);
if (ret) { inode_unlock_shared(inode);
inode_unlock(inode);
if (ret)
return ret; return ret;
} }
}
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out: out:
inode_unlock(inode);
return offset; return offset;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment