Commit d1542e03 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Change buffered write path to write to partial pages

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 7f5e31e1
...@@ -519,7 +519,6 @@ struct bch_page_sector { ...@@ -519,7 +519,6 @@ struct bch_page_sector {
/* i_sectors: */ /* i_sectors: */
enum { enum {
SECTOR_UNALLOCATED, SECTOR_UNALLOCATED,
SECTOR_QUOTA_RESERVED,
SECTOR_DIRTY, SECTOR_DIRTY,
SECTOR_ALLOCATED, SECTOR_ALLOCATED,
} state:2; } state:2;
...@@ -597,31 +596,6 @@ static struct bch_page_state *bch2_page_state_create(struct page *page, ...@@ -597,31 +596,6 @@ static struct bch_page_state *bch2_page_state_create(struct page *page,
return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp); return bch2_page_state(page) ?: __bch2_page_state_create(page, gfp);
} }
static void bch2_put_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
struct page *page)
{
struct bch_page_state *s = bch2_page_state(page);
struct disk_reservation disk_res = { 0 };
struct quota_res quota_res = { 0 };
unsigned i;
if (!s)
return;
for (i = 0; i < ARRAY_SIZE(s->s); i++) {
disk_res.sectors += s->s[i].replicas_reserved;
s->s[i].replicas_reserved = 0;
if (s->s[i].state == SECTOR_QUOTA_RESERVED) {
quota_res.sectors++;
s->s[i].state = SECTOR_UNALLOCATED;
}
}
bch2_quota_reservation_put(c, inode, &quota_res);
bch2_disk_reservation_put(c, &disk_res);
}
static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode) static inline unsigned inode_nr_replicas(struct bch_fs *c, struct bch_inode_info *inode)
{ {
/* XXX: this should not be open coded */ /* XXX: this should not be open coded */
...@@ -672,100 +646,134 @@ static int bch2_get_page_disk_reservation(struct bch_fs *c, ...@@ -672,100 +646,134 @@ static int bch2_get_page_disk_reservation(struct bch_fs *c,
return 0; return 0;
} }
static int bch2_get_page_quota_reservation(struct bch_fs *c, struct bch2_page_reservation {
struct disk_reservation disk;
struct quota_res quota;
};
static void bch2_page_reservation_init(struct bch_fs *c,
struct bch_inode_info *inode,
struct bch2_page_reservation *res)
{
memset(res, 0, sizeof(*res));
res->disk.nr_replicas = inode_nr_replicas(c, inode);
}
static void bch2_page_reservation_put(struct bch_fs *c,
struct bch_inode_info *inode, struct bch_inode_info *inode,
struct page *page, bool check_enospc) struct bch2_page_reservation *res)
{
bch2_disk_reservation_put(c, &res->disk);
bch2_quota_reservation_put(c, inode, &res->quota);
}
static int bch2_page_reservation_get(struct bch_fs *c,
struct bch_inode_info *inode, struct page *page,
struct bch2_page_reservation *res,
unsigned offset, unsigned len, bool check_enospc)
{ {
struct bch_page_state *s = bch2_page_state_create(page, 0); struct bch_page_state *s = bch2_page_state_create(page, 0);
struct quota_res quota_res = { 0 }; unsigned i, disk_sectors = 0, quota_sectors = 0;
unsigned i, quota_res_sectors = 0;
int ret; int ret;
if (!s) if (!s)
return -ENOMEM; return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(s->s); i++) for (i = offset / 512;
quota_res_sectors += s->s[i].state == SECTOR_UNALLOCATED; i < DIV_ROUND_UP(offset + len, 512);
i++) {
if (!quota_res_sectors) disk_sectors += sectors_to_reserve(&s->s[i],
return 0; res->disk.nr_replicas);
quota_sectors += s->s[i].state == SECTOR_UNALLOCATED;
}
ret = bch2_quota_reservation_add(c, inode, &quota_res, if (disk_sectors) {
quota_res_sectors, ret = bch2_disk_reservation_add(c, &res->disk,
check_enospc); disk_sectors,
if (unlikely(ret)) !check_enospc
return ret; ? BCH_DISK_RESERVATION_NOFAIL
: 0);
if (unlikely(ret))
return ret;
}
for (i = 0; i < ARRAY_SIZE(s->s); i++) if (quota_sectors) {
if (s->s[i].state == SECTOR_UNALLOCATED) ret = bch2_quota_reservation_add(c, inode, &res->quota,
s->s[i].state = SECTOR_QUOTA_RESERVED; quota_sectors,
check_enospc);
if (unlikely(ret)) {
struct disk_reservation tmp = {
.sectors = disk_sectors
};
bch2_disk_reservation_put(c, &tmp);
res->disk.sectors -= disk_sectors;
return ret;
}
}
return 0; return 0;
} }
static int bch2_get_page_reservation(struct bch_fs *c, struct bch_inode_info *inode,
struct page *page, bool check_enospc)
{
return bch2_get_page_disk_reservation(c, inode, page, check_enospc) ?:
bch2_get_page_quota_reservation(c, inode, page, check_enospc);
}
static void bch2_clear_page_bits(struct page *page) static void bch2_clear_page_bits(struct page *page)
{ {
struct bch_inode_info *inode = to_bch_ei(page->mapping->host); struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_page_state *s = bch2_page_state(page); struct bch_page_state *s = bch2_page_state(page);
struct disk_reservation disk_res = { 0 };
int i, dirty_sectors = 0; int i, dirty_sectors = 0;
if (!s) if (!s)
return; return;
for (i = 0; i < ARRAY_SIZE(s->s); i++) { for (i = 0; i < ARRAY_SIZE(s->s); i++) {
disk_res.sectors += s->s[i].replicas_reserved;
s->s[i].replicas_reserved = 0;
if (s->s[i].state == SECTOR_DIRTY) { if (s->s[i].state == SECTOR_DIRTY) {
dirty_sectors++; dirty_sectors++;
s->s[i].state = SECTOR_UNALLOCATED; s->s[i].state = SECTOR_UNALLOCATED;
} }
} }
bch2_disk_reservation_put(c, &disk_res);
if (dirty_sectors) if (dirty_sectors)
i_sectors_acct(c, inode, NULL, -dirty_sectors); i_sectors_acct(c, inode, NULL, -dirty_sectors);
bch2_put_page_reservation(c, inode, page);
bch2_page_state_release(page); bch2_page_state_release(page);
} }
static void __bch2_set_page_dirty(struct address_space *mapping, struct folio *folio) static void bch2_set_page_dirty(struct bch_fs *c,
struct bch_inode_info *inode, struct page *page,
struct bch2_page_reservation *res,
unsigned offset, unsigned len)
{ {
struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_page_state *s = bch2_page_state(page);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch_page_state *s = bch2_page_state(&folio->page);
struct quota_res quota_res = { 0 };
unsigned i, dirty_sectors = 0; unsigned i, dirty_sectors = 0;
BUG_ON(!s); for (i = offset / 512;
i < DIV_ROUND_UP(offset + len, 512);
i++) {
unsigned sectors = sectors_to_reserve(&s->s[i],
res->disk.nr_replicas);
for (i = 0; i < ARRAY_SIZE(s->s); i++) { BUG_ON(sectors > res->disk.sectors);
if (s->s[i].state == SECTOR_QUOTA_RESERVED) s->s[i].replicas_reserved += sectors;
quota_res.sectors++; res->disk.sectors -= sectors;
if (s->s[i].state == SECTOR_UNALLOCATED || if (s->s[i].state == SECTOR_UNALLOCATED) {
s->s[i].state == SECTOR_QUOTA_RESERVED) {
s->s[i].state = SECTOR_DIRTY; s->s[i].state = SECTOR_DIRTY;
dirty_sectors++; dirty_sectors++;
} }
} }
if (dirty_sectors) if (dirty_sectors)
i_sectors_acct(c, inode, &quota_res, dirty_sectors); i_sectors_acct(c, inode, &res->quota, dirty_sectors);
bch2_quota_reservation_put(c, inode, &quota_res);
}
static void bch2_set_page_dirty(struct address_space *mapping, struct page *page)
{
struct folio *folio = page_folio(page);
__bch2_set_page_dirty(mapping, folio); if (!PageDirty(page))
filemap_dirty_folio(mapping, folio); filemap_dirty_folio(inode->v.i_mapping, page_folio(page));
} }
vm_fault_t bch2_page_fault(struct vm_fault *vmf) vm_fault_t bch2_page_fault(struct vm_fault *vmf)
...@@ -788,8 +796,11 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) ...@@ -788,8 +796,11 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
struct bch_inode_info *inode = file_bch_inode(file); struct bch_inode_info *inode = file_bch_inode(file);
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_page_reservation res;
int ret = VM_FAULT_LOCKED; int ret = VM_FAULT_LOCKED;
bch2_page_reservation_init(c, inode, &res);
sb_start_pagefault(inode->v.i_sb); sb_start_pagefault(inode->v.i_sb);
file_update_time(file); file_update_time(file);
...@@ -809,18 +820,21 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf) ...@@ -809,18 +820,21 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
goto out; goto out;
} }
if (bch2_get_page_reservation(c, inode, page, true)) { if (bch2_page_reservation_get(c, inode, page, &res,
0, PAGE_SIZE, true)) {
unlock_page(page); unlock_page(page);
ret = VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS;
goto out; goto out;
} }
if (!PageDirty(page)) bch2_set_page_dirty(c, inode, page, &res, 0, PAGE_SIZE);
bch2_set_page_dirty(mapping, page);
wait_for_stable_page(page); wait_for_stable_page(page);
out: out:
bch2_pagecache_add_put(&inode->ei_pagecache_lock); bch2_pagecache_add_put(&inode->ei_pagecache_lock);
sb_end_pagefault(inode->v.i_sb); sb_end_pagefault(inode->v.i_sb);
bch2_page_reservation_put(c, inode, &res);
return ret; return ret;
} }
...@@ -1450,12 +1464,18 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, ...@@ -1450,12 +1464,18 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
{ {
struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_page_reservation *res;
pgoff_t index = pos >> PAGE_SHIFT; pgoff_t index = pos >> PAGE_SHIFT;
unsigned offset = pos & (PAGE_SIZE - 1); unsigned offset = pos & (PAGE_SIZE - 1);
struct page *page; struct page *page;
int ret = -ENOMEM; int ret = -ENOMEM;
BUG_ON(inode_unhashed(&inode->v)); res = kmalloc(sizeof(*res), GFP_KERNEL);
if (!res)
return -ENOMEM;
bch2_page_reservation_init(c, inode, res);
*fsdata = res;
bch2_pagecache_add_get(&inode->ei_pagecache_lock); bch2_pagecache_add_get(&inode->ei_pagecache_lock);
...@@ -1486,7 +1506,8 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, ...@@ -1486,7 +1506,8 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
if (ret) if (ret)
goto err; goto err;
out: out:
ret = bch2_get_page_reservation(c, inode, page, true); ret = bch2_page_reservation_get(c, inode, page, res,
offset, len, true);
if (ret) { if (ret) {
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
/* /*
...@@ -1509,6 +1530,8 @@ int bch2_write_begin(struct file *file, struct address_space *mapping, ...@@ -1509,6 +1530,8 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
*pagep = NULL; *pagep = NULL;
err_unlock: err_unlock:
bch2_pagecache_add_put(&inode->ei_pagecache_lock); bch2_pagecache_add_put(&inode->ei_pagecache_lock);
kfree(res);
*fsdata = NULL;
return ret; return ret;
} }
...@@ -1518,6 +1541,8 @@ int bch2_write_end(struct file *file, struct address_space *mapping, ...@@ -1518,6 +1541,8 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
{ {
struct bch_inode_info *inode = to_bch_ei(mapping->host); struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_page_reservation *res = fsdata;
unsigned offset = pos & (PAGE_SIZE - 1);
lockdep_assert_held(&inode->v.i_rwsem); lockdep_assert_held(&inode->v.i_rwsem);
...@@ -1540,18 +1565,19 @@ int bch2_write_end(struct file *file, struct address_space *mapping, ...@@ -1540,18 +1565,19 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
if (copied) { if (copied) {
if (!PageUptodate(page)) if (!PageUptodate(page))
SetPageUptodate(page); SetPageUptodate(page);
if (!PageDirty(page))
bch2_set_page_dirty(mapping, page); bch2_set_page_dirty(c, inode, page, res, offset, copied);
inode->ei_last_dirtied = (unsigned long) current; inode->ei_last_dirtied = (unsigned long) current;
} else {
bch2_put_page_reservation(c, inode, page);
} }
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
bch2_pagecache_add_put(&inode->ei_pagecache_lock); bch2_pagecache_add_put(&inode->ei_pagecache_lock);
bch2_page_reservation_put(c, inode, res);
kfree(res);
return copied; return copied;
} }
...@@ -1564,15 +1590,19 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, ...@@ -1564,15 +1590,19 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
{ {
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct page *pages[WRITE_BATCH_PAGES]; struct page *pages[WRITE_BATCH_PAGES];
struct bch2_page_reservation res;
unsigned long index = pos >> PAGE_SHIFT; unsigned long index = pos >> PAGE_SHIFT;
unsigned offset = pos & (PAGE_SIZE - 1); unsigned offset = pos & (PAGE_SIZE - 1);
unsigned nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); unsigned nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
unsigned i, copied = 0, nr_pages_copied = 0; unsigned i, reserved = 0, set_dirty = 0;
unsigned copied = 0, nr_pages_copied = 0;
int ret = 0; int ret = 0;
BUG_ON(!len); BUG_ON(!len);
BUG_ON(nr_pages > ARRAY_SIZE(pages)); BUG_ON(nr_pages > ARRAY_SIZE(pages));
bch2_page_reservation_init(c, inode, &res);
for (i = 0; i < nr_pages; i++) { for (i = 0; i < nr_pages; i++) {
pages[i] = grab_cache_page_write_begin(mapping, index + i); pages[i] = grab_cache_page_write_begin(mapping, index + i);
if (!pages[i]) { if (!pages[i]) {
...@@ -1599,19 +1629,25 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, ...@@ -1599,19 +1629,25 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
} }
} }
for (i = 0; i < nr_pages; i++) { while (reserved < len) {
ret = bch2_get_page_reservation(c, inode, pages[i], true); struct page *page = pages[(offset + reserved) >> PAGE_SHIFT];
unsigned pg_offset = (offset + reserved) & (PAGE_SIZE - 1);
if (ret && !PageUptodate(pages[i])) { unsigned pg_len = min_t(unsigned, len - reserved,
ret = bch2_read_single_page(pages[i], mapping); PAGE_SIZE - pg_offset);
if (ret) retry_reservation:
goto out; ret = bch2_page_reservation_get(c, inode, page, &res,
pg_offset, pg_len, true);
ret = bch2_get_page_reservation(c, inode, pages[i], true);
if (ret && !PageUptodate(page)) {
ret = bch2_read_single_page(page, mapping);
if (!ret)
goto retry_reservation;
} }
if (ret) if (ret)
goto out; goto out;
reserved += pg_len;
} }
if (mapping_writably_mapped(mapping)) if (mapping_writably_mapped(mapping))
...@@ -1621,16 +1657,16 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, ...@@ -1621,16 +1657,16 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
while (copied < len) { while (copied < len) {
struct page *page = pages[(offset + copied) >> PAGE_SHIFT]; struct page *page = pages[(offset + copied) >> PAGE_SHIFT];
unsigned pg_offset = (offset + copied) & (PAGE_SIZE - 1); unsigned pg_offset = (offset + copied) & (PAGE_SIZE - 1);
unsigned pg_bytes = min_t(unsigned, len - copied, unsigned pg_len = min_t(unsigned, len - copied,
PAGE_SIZE - pg_offset); PAGE_SIZE - pg_offset);
unsigned pg_copied = copy_page_from_iter_atomic(page, unsigned pg_copied = copy_page_from_iter_atomic(page,
pg_offset, pg_bytes, iter); pg_offset, pg_len, iter);
if (!pg_copied)
break;
flush_dcache_page(page); flush_dcache_page(page);
copied += pg_copied; copied += pg_copied;
if (pg_copied != pg_bytes)
break;
} }
if (!copied) if (!copied)
...@@ -1653,23 +1689,30 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, ...@@ -1653,23 +1689,30 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
copied -= (offset + copied) & (PAGE_SIZE - 1); copied -= (offset + copied) & (PAGE_SIZE - 1);
} }
} }
out:
for (i = 0; i < nr_pages_copied; i++) {
if (!PageUptodate(pages[i]))
SetPageUptodate(pages[i]);
if (!PageDirty(pages[i]))
bch2_set_page_dirty(mapping, pages[i]);
unlock_page(pages[i]);
put_page(pages[i]);
}
while (set_dirty < copied) {
struct page *page = pages[(offset + set_dirty) >> PAGE_SHIFT];
unsigned pg_offset = (offset + set_dirty) & (PAGE_SIZE - 1);
unsigned pg_len = min_t(unsigned, copied - set_dirty,
PAGE_SIZE - pg_offset);
if (!PageUptodate(page))
SetPageUptodate(page);
bch2_set_page_dirty(c, inode, page, &res, pg_offset, pg_len);
unlock_page(page);
put_page(page);
set_dirty += pg_len;
}
out:
for (i = nr_pages_copied; i < nr_pages; i++) { for (i = nr_pages_copied; i < nr_pages; i++) {
if (!PageDirty(pages[i]))
bch2_put_page_reservation(c, inode, pages[i]);
unlock_page(pages[i]); unlock_page(pages[i]);
put_page(pages[i]); put_page(pages[i]);
} }
bch2_page_reservation_put(c, inode, &res);
return copied ?: ret; return copied ?: ret;
} }
...@@ -2322,7 +2365,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode, ...@@ -2322,7 +2365,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
* XXX: because we aren't currently tracking whether the page has actual * XXX: because we aren't currently tracking whether the page has actual
* data in it (vs. just 0s, or only partially written) this wrong. ick. * data in it (vs. just 0s, or only partially written) this wrong. ick.
*/ */
ret = bch2_get_page_reservation(c, inode, page, false); ret = bch2_get_page_disk_reservation(c, inode, page, false);
BUG_ON(ret); BUG_ON(ret);
if (index == start >> PAGE_SHIFT && if (index == start >> PAGE_SHIFT &&
...@@ -2333,8 +2376,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode, ...@@ -2333,8 +2376,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
else if (index == end >> PAGE_SHIFT) else if (index == end >> PAGE_SHIFT)
zero_user_segment(page, 0, end_offset); zero_user_segment(page, 0, end_offset);
if (!PageDirty(page)) filemap_dirty_folio(mapping, page_folio(page));
bch2_set_page_dirty(mapping, page);
unlock: unlock:
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment