Commit a7fa20a5 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:
 "This adds a ->writepage() implementation to fuse, improving mmaped
  writeout and paving the way for buffered writeback.

  And there's a patch to add a fix minor number for /dev/cuse, similarly
  to /dev/fuse"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: writepages: protect secondary requests from fuse file release
  fuse: writepages: update bdi writeout when deleting secondary request
  fuse: writepages: crop secondary requests
  fuse: writepages: roll back changes if request not found
  cuse: add fix minor number to /dev/cuse
  fuse: writepage: skip already in flight
  fuse: writepages: handle same page rewrites
  fuse: writepages: fix aggregation
  fuse: fix race in fuse_writepages()
  fuse: Implement writepages callback
  fuse: don't BUG on no write file
  fuse: lock page in mkwrite
  fuse: Prepare to handle multiple pages in writeback
  fuse: Getting file for writeback helper
parents a3012453 ce128de6
...@@ -414,6 +414,7 @@ Your cooperation is appreciated. ...@@ -414,6 +414,7 @@ Your cooperation is appreciated.
200 = /dev/net/tun TAP/TUN network device 200 = /dev/net/tun TAP/TUN network device
201 = /dev/button/gulpb Transmeta GULP-B buttons 201 = /dev/button/gulpb Transmeta GULP-B buttons
202 = /dev/emd/ctl Enhanced Metadisk RAID (EMD) control 202 = /dev/emd/ctl Enhanced Metadisk RAID (EMD) control
203 = /dev/cuse Cuse (character device in user-space)
204 = /dev/video/em8300 EM8300 DVD decoder control 204 = /dev/video/em8300 EM8300 DVD decoder control
205 = /dev/video/em8300_mv EM8300 DVD decoder video 205 = /dev/video/em8300_mv EM8300 DVD decoder video
206 = /dev/video/em8300_ma EM8300 DVD decoder audio 206 = /dev/video/em8300_ma EM8300 DVD decoder audio
......
...@@ -589,11 +589,14 @@ static struct attribute *cuse_class_dev_attrs[] = { ...@@ -589,11 +589,14 @@ static struct attribute *cuse_class_dev_attrs[] = {
ATTRIBUTE_GROUPS(cuse_class_dev); ATTRIBUTE_GROUPS(cuse_class_dev);
static struct miscdevice cuse_miscdev = { static struct miscdevice cuse_miscdev = {
.minor = MISC_DYNAMIC_MINOR, .minor = CUSE_MINOR,
.name = "cuse", .name = "cuse",
.fops = &cuse_channel_fops, .fops = &cuse_channel_fops,
}; };
MODULE_ALIAS_MISCDEV(CUSE_MINOR);
MODULE_ALIAS("devname:cuse");
static int __init cuse_init(void) static int __init cuse_init(void)
{ {
int i, rc; int i, rc;
......
...@@ -334,7 +334,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) ...@@ -334,7 +334,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
BUG_ON(req->inode != inode); BUG_ON(req->inode != inode);
curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
if (curr_index == index) { if (curr_index <= index &&
index < curr_index + req->num_pages) {
found = true; found = true;
break; break;
} }
...@@ -1409,8 +1410,13 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, ...@@ -1409,8 +1410,13 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
{ {
__free_page(req->pages[0]); int i;
fuse_file_put(req->ff, false);
for (i = 0; i < req->num_pages; i++)
__free_page(req->pages[i]);
if (req->ff)
fuse_file_put(req->ff, false);
} }
static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
...@@ -1418,30 +1424,34 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) ...@@ -1418,30 +1424,34 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
struct inode *inode = req->inode; struct inode *inode = req->inode;
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
int i;
list_del(&req->writepages_entry); list_del(&req->writepages_entry);
dec_bdi_stat(bdi, BDI_WRITEBACK); for (i = 0; i < req->num_pages; i++) {
dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP); dec_bdi_stat(bdi, BDI_WRITEBACK);
bdi_writeout_inc(bdi); dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP);
bdi_writeout_inc(bdi);
}
wake_up(&fi->page_waitq); wake_up(&fi->page_waitq);
} }
/* Called under fc->lock, may release and reacquire it */ /* Called under fc->lock, may release and reacquire it */
static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
loff_t size)
__releases(fc->lock) __releases(fc->lock)
__acquires(fc->lock) __acquires(fc->lock)
{ {
struct fuse_inode *fi = get_fuse_inode(req->inode); struct fuse_inode *fi = get_fuse_inode(req->inode);
loff_t size = i_size_read(req->inode);
struct fuse_write_in *inarg = &req->misc.write.in; struct fuse_write_in *inarg = &req->misc.write.in;
__u64 data_size = req->num_pages * PAGE_CACHE_SIZE;
if (!fc->connected) if (!fc->connected)
goto out_free; goto out_free;
if (inarg->offset + PAGE_CACHE_SIZE <= size) { if (inarg->offset + data_size <= size) {
inarg->size = PAGE_CACHE_SIZE; inarg->size = data_size;
} else if (inarg->offset < size) { } else if (inarg->offset < size) {
inarg->size = size & (PAGE_CACHE_SIZE - 1); inarg->size = size - inarg->offset;
} else { } else {
/* Got truncated off completely */ /* Got truncated off completely */
goto out_free; goto out_free;
...@@ -1472,12 +1482,13 @@ __acquires(fc->lock) ...@@ -1472,12 +1482,13 @@ __acquires(fc->lock)
{ {
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
size_t crop = i_size_read(inode);
struct fuse_req *req; struct fuse_req *req;
while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
req = list_entry(fi->queued_writes.next, struct fuse_req, list); req = list_entry(fi->queued_writes.next, struct fuse_req, list);
list_del_init(&req->list); list_del_init(&req->list);
fuse_send_writepage(fc, req); fuse_send_writepage(fc, req, crop);
} }
} }
...@@ -1488,12 +1499,62 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) ...@@ -1488,12 +1499,62 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
mapping_set_error(inode->i_mapping, req->out.h.error); mapping_set_error(inode->i_mapping, req->out.h.error);
spin_lock(&fc->lock); spin_lock(&fc->lock);
while (req->misc.write.next) {
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_write_in *inarg = &req->misc.write.in;
struct fuse_req *next = req->misc.write.next;
req->misc.write.next = next->misc.write.next;
next->misc.write.next = NULL;
next->ff = fuse_file_get(req->ff);
list_add(&next->writepages_entry, &fi->writepages);
/*
* Skip fuse_flush_writepages() to make it easy to crop requests
* based on primary request size.
*
* 1st case (trivial): there are no concurrent activities using
* fuse_set/release_nowrite. Then we're on safe side because
* fuse_flush_writepages() would call fuse_send_writepage()
* anyway.
*
* 2nd case: someone called fuse_set_nowrite and it is waiting
* now for completion of all in-flight requests. This happens
* rarely and no more than once per page, so this should be
* okay.
*
* 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
* of fuse_set_nowrite..fuse_release_nowrite section. The fact
* that fuse_set_nowrite returned implies that all in-flight
* requests were completed along with all of their secondary
* requests. Further primary requests are blocked by negative
* writectr. Hence there cannot be any in-flight requests and
* no invocations of fuse_writepage_end() while we're in
* fuse_set_nowrite..fuse_release_nowrite section.
*/
fuse_send_writepage(fc, next, inarg->offset + inarg->size);
}
fi->writectr--; fi->writectr--;
fuse_writepage_finish(fc, req); fuse_writepage_finish(fc, req);
spin_unlock(&fc->lock); spin_unlock(&fc->lock);
fuse_writepage_free(fc, req); fuse_writepage_free(fc, req);
} }
static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
struct fuse_inode *fi)
{
struct fuse_file *ff = NULL;
spin_lock(&fc->lock);
if (!WARN_ON(list_empty(&fi->write_files))) {
ff = list_entry(fi->write_files.next, struct fuse_file,
write_entry);
fuse_file_get(ff);
}
spin_unlock(&fc->lock);
return ff;
}
static int fuse_writepage_locked(struct page *page) static int fuse_writepage_locked(struct page *page)
{ {
struct address_space *mapping = page->mapping; struct address_space *mapping = page->mapping;
...@@ -1501,8 +1562,8 @@ static int fuse_writepage_locked(struct page *page) ...@@ -1501,8 +1562,8 @@ static int fuse_writepage_locked(struct page *page)
struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_req *req; struct fuse_req *req;
struct fuse_file *ff;
struct page *tmp_page; struct page *tmp_page;
int error = -ENOMEM;
set_page_writeback(page); set_page_writeback(page);
...@@ -1515,16 +1576,16 @@ static int fuse_writepage_locked(struct page *page) ...@@ -1515,16 +1576,16 @@ static int fuse_writepage_locked(struct page *page)
if (!tmp_page) if (!tmp_page)
goto err_free; goto err_free;
spin_lock(&fc->lock); error = -EIO;
BUG_ON(list_empty(&fi->write_files)); req->ff = fuse_write_file_get(fc, fi);
ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); if (!req->ff)
req->ff = fuse_file_get(ff); goto err_free;
spin_unlock(&fc->lock);
fuse_write_fill(req, ff, page_offset(page), 0); fuse_write_fill(req, req->ff, page_offset(page), 0);
copy_highpage(tmp_page, page); copy_highpage(tmp_page, page);
req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
req->misc.write.next = NULL;
req->in.argpages = 1; req->in.argpages = 1;
req->num_pages = 1; req->num_pages = 1;
req->pages[0] = tmp_page; req->pages[0] = tmp_page;
...@@ -1550,19 +1611,263 @@ static int fuse_writepage_locked(struct page *page) ...@@ -1550,19 +1611,263 @@ static int fuse_writepage_locked(struct page *page)
fuse_request_free(req); fuse_request_free(req);
err: err:
end_page_writeback(page); end_page_writeback(page);
return -ENOMEM; return error;
} }
static int fuse_writepage(struct page *page, struct writeback_control *wbc) static int fuse_writepage(struct page *page, struct writeback_control *wbc)
{ {
int err; int err;
if (fuse_page_is_writeback(page->mapping->host, page->index)) {
/*
* ->writepages() should be called for sync() and friends. We
* should only get here on direct reclaim and then we are
* allowed to skip a page which is already in flight
*/
WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
redirty_page_for_writepage(wbc, page);
return 0;
}
err = fuse_writepage_locked(page); err = fuse_writepage_locked(page);
unlock_page(page); unlock_page(page);
return err; return err;
} }
struct fuse_fill_wb_data {
struct fuse_req *req;
struct fuse_file *ff;
struct inode *inode;
struct page **orig_pages;
};
static void fuse_writepages_send(struct fuse_fill_wb_data *data)
{
struct fuse_req *req = data->req;
struct inode *inode = data->inode;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
int num_pages = req->num_pages;
int i;
req->ff = fuse_file_get(data->ff);
spin_lock(&fc->lock);
list_add_tail(&req->list, &fi->queued_writes);
fuse_flush_writepages(inode);
spin_unlock(&fc->lock);
for (i = 0; i < num_pages; i++)
end_page_writeback(data->orig_pages[i]);
}
static bool fuse_writepage_in_flight(struct fuse_req *new_req,
struct page *page)
{
struct fuse_conn *fc = get_fuse_conn(new_req->inode);
struct fuse_inode *fi = get_fuse_inode(new_req->inode);
struct fuse_req *tmp;
struct fuse_req *old_req;
bool found = false;
pgoff_t curr_index;
BUG_ON(new_req->num_pages != 0);
spin_lock(&fc->lock);
list_del(&new_req->writepages_entry);
list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
BUG_ON(old_req->inode != new_req->inode);
curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
if (curr_index <= page->index &&
page->index < curr_index + old_req->num_pages) {
found = true;
break;
}
}
if (!found) {
list_add(&new_req->writepages_entry, &fi->writepages);
goto out_unlock;
}
new_req->num_pages = 1;
for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
BUG_ON(tmp->inode != new_req->inode);
curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT;
if (tmp->num_pages == 1 &&
curr_index == page->index) {
old_req = tmp;
}
}
if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT ||
old_req->state == FUSE_REQ_PENDING)) {
struct backing_dev_info *bdi = page->mapping->backing_dev_info;
copy_highpage(old_req->pages[0], page);
spin_unlock(&fc->lock);
dec_bdi_stat(bdi, BDI_WRITEBACK);
dec_zone_page_state(page, NR_WRITEBACK_TEMP);
bdi_writeout_inc(bdi);
fuse_writepage_free(fc, new_req);
fuse_request_free(new_req);
goto out;
} else {
new_req->misc.write.next = old_req->misc.write.next;
old_req->misc.write.next = new_req;
}
out_unlock:
spin_unlock(&fc->lock);
out:
return found;
}
static int fuse_writepages_fill(struct page *page,
struct writeback_control *wbc, void *_data)
{
struct fuse_fill_wb_data *data = _data;
struct fuse_req *req = data->req;
struct inode *inode = data->inode;
struct fuse_conn *fc = get_fuse_conn(inode);
struct page *tmp_page;
bool is_writeback;
int err;
if (!data->ff) {
err = -EIO;
data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
if (!data->ff)
goto out_unlock;
}
/*
* Being under writeback is unlikely but possible. For example direct
* read to an mmaped fuse file will set the page dirty twice; once when
* the pages are faulted with get_user_pages(), and then after the read
* completed.
*/
is_writeback = fuse_page_is_writeback(inode, page->index);
if (req && req->num_pages &&
(is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
(req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
fuse_writepages_send(data);
data->req = NULL;
}
err = -ENOMEM;
tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
if (!tmp_page)
goto out_unlock;
/*
* The page must not be redirtied until the writeout is completed
* (i.e. userspace has sent a reply to the write request). Otherwise
* there could be more than one temporary page instance for each real
* page.
*
* This is ensured by holding the page lock in page_mkwrite() while
* checking fuse_page_is_writeback(). We already hold the page lock
* since clear_page_dirty_for_io() and keep it held until we add the
* request to the fi->writepages list and increment req->num_pages.
* After this fuse_page_is_writeback() will indicate that the page is
* under writeback, so we can release the page lock.
*/
if (data->req == NULL) {
struct fuse_inode *fi = get_fuse_inode(inode);
err = -ENOMEM;
req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
if (!req) {
__free_page(tmp_page);
goto out_unlock;
}
fuse_write_fill(req, data->ff, page_offset(page), 0);
req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
req->misc.write.next = NULL;
req->in.argpages = 1;
req->background = 1;
req->num_pages = 0;
req->end = fuse_writepage_end;
req->inode = inode;
spin_lock(&fc->lock);
list_add(&req->writepages_entry, &fi->writepages);
spin_unlock(&fc->lock);
data->req = req;
}
set_page_writeback(page);
copy_highpage(tmp_page, page);
req->pages[req->num_pages] = tmp_page;
req->page_descs[req->num_pages].offset = 0;
req->page_descs[req->num_pages].length = PAGE_SIZE;
inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK);
inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
err = 0;
if (is_writeback && fuse_writepage_in_flight(req, page)) {
end_page_writeback(page);
data->req = NULL;
goto out_unlock;
}
data->orig_pages[req->num_pages] = page;
/*
* Protected by fc->lock against concurrent access by
* fuse_page_is_writeback().
*/
spin_lock(&fc->lock);
req->num_pages++;
spin_unlock(&fc->lock);
out_unlock:
unlock_page(page);
return err;
}
static int fuse_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct fuse_fill_wb_data data;
int err;
err = -EIO;
if (is_bad_inode(inode))
goto out;
data.inode = inode;
data.req = NULL;
data.ff = NULL;
err = -ENOMEM;
data.orig_pages = kzalloc(sizeof(struct page *) *
FUSE_MAX_PAGES_PER_REQ,
GFP_NOFS);
if (!data.orig_pages)
goto out;
err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
if (data.req) {
/* Ignore errors if we can write at least one page */
BUG_ON(!data.req->num_pages);
fuse_writepages_send(&data);
err = 0;
}
if (data.ff)
fuse_file_put(data.ff, false);
kfree(data.orig_pages);
out:
return err;
}
static int fuse_launder_page(struct page *page) static int fuse_launder_page(struct page *page)
{ {
int err = 0; int err = 0;
...@@ -1602,14 +1907,17 @@ static void fuse_vma_close(struct vm_area_struct *vma) ...@@ -1602,14 +1907,17 @@ static void fuse_vma_close(struct vm_area_struct *vma)
static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
struct page *page = vmf->page; struct page *page = vmf->page;
/* struct inode *inode = file_inode(vma->vm_file);
* Don't use page->mapping as it may become NULL from a
* concurrent truncate. file_update_time(vma->vm_file);
*/ lock_page(page);
struct inode *inode = vma->vm_file->f_mapping->host; if (page->mapping != inode->i_mapping) {
unlock_page(page);
return VM_FAULT_NOPAGE;
}
fuse_wait_on_page_writeback(inode, page->index); fuse_wait_on_page_writeback(inode, page->index);
return 0; return VM_FAULT_LOCKED;
} }
static const struct vm_operations_struct fuse_file_vm_ops = { static const struct vm_operations_struct fuse_file_vm_ops = {
...@@ -2581,6 +2889,7 @@ static const struct file_operations fuse_direct_io_file_operations = { ...@@ -2581,6 +2889,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
static const struct address_space_operations fuse_file_aops = { static const struct address_space_operations fuse_file_aops = {
.readpage = fuse_readpage, .readpage = fuse_readpage,
.writepage = fuse_writepage, .writepage = fuse_writepage,
.writepages = fuse_writepages,
.launder_page = fuse_launder_page, .launder_page = fuse_launder_page,
.readpages = fuse_readpages, .readpages = fuse_readpages,
.set_page_dirty = __set_page_dirty_nobuffers, .set_page_dirty = __set_page_dirty_nobuffers,
......
...@@ -321,6 +321,7 @@ struct fuse_req { ...@@ -321,6 +321,7 @@ struct fuse_req {
struct { struct {
struct fuse_write_in in; struct fuse_write_in in;
struct fuse_write_out out; struct fuse_write_out out;
struct fuse_req *next;
} write; } write;
struct fuse_notify_retrieve_in retrieve_in; struct fuse_notify_retrieve_in retrieve_in;
struct fuse_lk_in lk_in; struct fuse_lk_in lk_in;
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#define I2O_MINOR 166 #define I2O_MINOR 166
#define MICROCODE_MINOR 184 #define MICROCODE_MINOR 184
#define TUN_MINOR 200 #define TUN_MINOR 200
#define CUSE_MINOR 203
#define MWAVE_MINOR 219 /* ACP/Mwave Modem */ #define MWAVE_MINOR 219 /* ACP/Mwave Modem */
#define MPT_MINOR 220 #define MPT_MINOR 220
#define MPT2SAS_MINOR 221 #define MPT2SAS_MINOR 221
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment