Commit 9b5cf826 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'fuse-update-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:
 "As well as the usual bug fixes, this adds the following new features:

   - cached readdir and readlink

   - max I/O size increased from 128k to 1M

   - improved performance and scalability of request queues

   - copy_file_range support

  The only non-fuse bits are trivial cleanups of macros in
  <linux/bitops.h>"

* tag 'fuse-update-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (31 commits)
  fuse: enable caching of symlinks
  fuse: only invalidate atime in direct read
  fuse: don't need GETATTR after every READ
  fuse: allow fine grained attr cache invaldation
  bitops: protect variables in bit_clear_unless() macro
  bitops: protect variables in set_mask_bits() macro
  fuse: realloc page array
  fuse: add max_pages to init_out
  fuse: allocate page array more efficiently
  fuse: reduce size of struct fuse_inode
  fuse: use iversion for readdir cache verification
  fuse: use mtime for readdir cache verification
  fuse: add readdir cache version
  fuse: allow using readdir cache
  fuse: allow caching readdir
  fuse: extract fuse_emit() helper
  fuse: add FOPEN_CACHE_DIR
  fuse: split out readdir.c
  fuse: Use hash table to link processing request
  fuse: kill req->intr_unique
  ...
parents 31990f0f 5571f1e6
......@@ -5,4 +5,4 @@
obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o
fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o
fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
......@@ -107,7 +107,7 @@ static ssize_t fuse_conn_max_background_read(struct file *file,
if (!fc)
return 0;
val = fc->max_background;
val = READ_ONCE(fc->max_background);
fuse_conn_put(fc);
return fuse_conn_limit_read(file, buf, len, ppos, val);
......@@ -125,7 +125,12 @@ static ssize_t fuse_conn_max_background_write(struct file *file,
if (ret > 0) {
struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
if (fc) {
spin_lock(&fc->bg_lock);
fc->max_background = val;
fc->blocked = fc->num_background >= fc->max_background;
if (!fc->blocked)
wake_up(&fc->blocked_waitq);
spin_unlock(&fc->bg_lock);
fuse_conn_put(fc);
}
}
......@@ -144,7 +149,7 @@ static ssize_t fuse_conn_congestion_threshold_read(struct file *file,
if (!fc)
return 0;
val = fc->congestion_threshold;
val = READ_ONCE(fc->congestion_threshold);
fuse_conn_put(fc);
return fuse_conn_limit_read(file, buf, len, ppos, val);
......@@ -155,18 +160,31 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
size_t count, loff_t *ppos)
{
unsigned uninitialized_var(val);
struct fuse_conn *fc;
ssize_t ret;
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
max_user_congthresh);
if (ret > 0) {
struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
if (fc) {
fc->congestion_threshold = val;
fuse_conn_put(fc);
if (ret <= 0)
goto out;
fc = fuse_ctl_file_conn_get(file);
if (!fc)
goto out;
spin_lock(&fc->bg_lock);
fc->congestion_threshold = val;
if (fc->sb) {
if (fc->num_background < fc->congestion_threshold) {
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
} else {
set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
}
}
spin_unlock(&fc->bg_lock);
fuse_conn_put(fc);
out:
return ret;
}
......
......@@ -25,6 +25,10 @@
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse");
/* Ordinary requests have even IDs, while interrupts IDs are odd */
#define FUSE_INT_REQ_BIT (1ULL << 0)
#define FUSE_REQ_ID_STEP (1ULL << 1)
static struct kmem_cache *fuse_req_cachep;
static struct fuse_dev *fuse_get_dev(struct file *file)
......@@ -40,9 +44,6 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages,
struct fuse_page_desc *page_descs,
unsigned npages)
{
memset(req, 0, sizeof(*req));
memset(pages, 0, sizeof(*pages) * npages);
memset(page_descs, 0, sizeof(*page_descs) * npages);
INIT_LIST_HEAD(&req->list);
INIT_LIST_HEAD(&req->intr_entry);
init_waitqueue_head(&req->waitq);
......@@ -53,30 +54,36 @@ static void fuse_request_init(struct fuse_req *req, struct page **pages,
__set_bit(FR_PENDING, &req->flags);
}
static struct page **fuse_req_pages_alloc(unsigned int npages, gfp_t flags,
struct fuse_page_desc **desc)
{
struct page **pages;
pages = kzalloc(npages * (sizeof(struct page *) +
sizeof(struct fuse_page_desc)), flags);
*desc = (void *) pages + npages * sizeof(struct page *);
return pages;
}
static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
{
struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
if (req) {
struct page **pages;
struct fuse_page_desc *page_descs;
if (npages <= FUSE_REQ_INLINE_PAGES) {
struct page **pages = NULL;
struct fuse_page_desc *page_descs = NULL;
WARN_ON(npages > FUSE_MAX_MAX_PAGES);
if (npages > FUSE_REQ_INLINE_PAGES) {
pages = fuse_req_pages_alloc(npages, flags,
&page_descs);
if (!pages) {
kmem_cache_free(fuse_req_cachep, req);
return NULL;
}
} else if (npages) {
pages = req->inline_pages;
page_descs = req->inline_page_descs;
} else {
pages = kmalloc_array(npages, sizeof(struct page *),
flags);
page_descs =
kmalloc_array(npages,
sizeof(struct fuse_page_desc),
flags);
}
if (!pages || !page_descs) {
kfree(pages);
kfree(page_descs);
kmem_cache_free(fuse_req_cachep, req);
return NULL;
}
fuse_request_init(req, pages, page_descs, npages);
......@@ -95,12 +102,41 @@ struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
return __fuse_request_alloc(npages, GFP_NOFS);
}
void fuse_request_free(struct fuse_req *req)
static void fuse_req_pages_free(struct fuse_req *req)
{
if (req->pages != req->inline_pages) {
if (req->pages != req->inline_pages)
kfree(req->pages);
kfree(req->page_descs);
}
}
bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req,
gfp_t flags)
{
struct page **pages;
struct fuse_page_desc *page_descs;
unsigned int npages = min_t(unsigned int,
max_t(unsigned int, req->max_pages * 2,
FUSE_DEFAULT_MAX_PAGES_PER_REQ),
fc->max_pages);
WARN_ON(npages <= req->max_pages);
pages = fuse_req_pages_alloc(npages, flags, &page_descs);
if (!pages)
return false;
memcpy(pages, req->pages, sizeof(struct page *) * req->max_pages);
memcpy(page_descs, req->page_descs,
sizeof(struct fuse_page_desc) * req->max_pages);
fuse_req_pages_free(req);
req->pages = pages;
req->page_descs = page_descs;
req->max_pages = npages;
return true;
}
void fuse_request_free(struct fuse_req *req)
{
fuse_req_pages_free(req);
kmem_cache_free(fuse_req_cachep, req);
}
......@@ -235,8 +271,10 @@ static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
struct file *file = req->stolen_file;
struct fuse_file *ff = file->private_data;
WARN_ON(req->max_pages);
spin_lock(&fc->lock);
fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
memset(req, 0, sizeof(*req));
fuse_request_init(req, NULL, NULL, 0);
BUG_ON(ff->reserved_req);
ff->reserved_req = req;
wake_up_all(&fc->reserved_req_waitq);
......@@ -287,10 +325,10 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
* We get here in the unlikely case that a background
* request was allocated but not sent
*/
spin_lock(&fc->lock);
spin_lock(&fc->bg_lock);
if (!fc->blocked)
wake_up(&fc->blocked_waitq);
spin_unlock(&fc->lock);
spin_unlock(&fc->bg_lock);
}
if (test_bit(FR_WAITING, &req->flags)) {
......@@ -319,7 +357,13 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args)
static u64 fuse_get_unique(struct fuse_iqueue *fiq)
{
return ++fiq->reqctr;
fiq->reqctr += FUSE_REQ_ID_STEP;
return fiq->reqctr;
}
static unsigned int fuse_req_hash(u64 unique)
{
return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
}
static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
......@@ -353,12 +397,13 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
static void flush_bg_queue(struct fuse_conn *fc)
{
struct fuse_iqueue *fiq = &fc->iq;
while (fc->active_background < fc->max_background &&
!list_empty(&fc->bg_queue)) {
struct fuse_req *req;
struct fuse_iqueue *fiq = &fc->iq;
req = list_entry(fc->bg_queue.next, struct fuse_req, list);
req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
list_del(&req->list);
fc->active_background++;
spin_lock(&fiq->waitq.lock);
......@@ -389,14 +434,21 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
WARN_ON(test_bit(FR_PENDING, &req->flags));
WARN_ON(test_bit(FR_SENT, &req->flags));
if (test_bit(FR_BACKGROUND, &req->flags)) {
spin_lock(&fc->lock);
spin_lock(&fc->bg_lock);
clear_bit(FR_BACKGROUND, &req->flags);
if (fc->num_background == fc->max_background)
if (fc->num_background == fc->max_background) {
fc->blocked = 0;
/* Wake up next waiter, if any */
if (!fc->blocked && waitqueue_active(&fc->blocked_waitq))
wake_up(&fc->blocked_waitq);
} else if (!fc->blocked) {
/*
* Wake up next waiter, if any. It's okay to use
* waitqueue_active(), as we've already synced up
* fc->blocked with waiters with the wake_up() call
* above.
*/
if (waitqueue_active(&fc->blocked_waitq))
wake_up(&fc->blocked_waitq);
}
if (fc->num_background == fc->congestion_threshold && fc->sb) {
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
......@@ -405,7 +457,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
fc->num_background--;
fc->active_background--;
flush_bg_queue(fc);
spin_unlock(&fc->lock);
spin_unlock(&fc->bg_lock);
}
wake_up(&req->waitq);
if (req->end)
......@@ -573,40 +625,38 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
return ret;
}
/*
* Called under fc->lock
*
* fc->connected must have been checked previously
*/
void fuse_request_send_background_locked(struct fuse_conn *fc,
struct fuse_req *req)
bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req)
{
BUG_ON(!test_bit(FR_BACKGROUND, &req->flags));
bool queued = false;
WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
if (!test_bit(FR_WAITING, &req->flags)) {
__set_bit(FR_WAITING, &req->flags);
atomic_inc(&fc->num_waiting);
}
__set_bit(FR_ISREPLY, &req->flags);
fc->num_background++;
if (fc->num_background == fc->max_background)
fc->blocked = 1;
if (fc->num_background == fc->congestion_threshold && fc->sb) {
set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
spin_lock(&fc->bg_lock);
if (likely(fc->connected)) {
fc->num_background++;
if (fc->num_background == fc->max_background)
fc->blocked = 1;
if (fc->num_background == fc->congestion_threshold && fc->sb) {
set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
}
list_add_tail(&req->list, &fc->bg_queue);
flush_bg_queue(fc);
queued = true;
}
list_add_tail(&req->list, &fc->bg_queue);
flush_bg_queue(fc);
spin_unlock(&fc->bg_lock);
return queued;
}
void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
{
BUG_ON(!req->end);
spin_lock(&fc->lock);
if (fc->connected) {
fuse_request_send_background_locked(fc, req);
spin_unlock(&fc->lock);
} else {
spin_unlock(&fc->lock);
WARN_ON(!req->end);
if (!fuse_request_queue_background(fc, req)) {
req->out.h.error = -ENOTCONN;
req->end(fc, req);
fuse_put_request(fc, req);
......@@ -1084,12 +1134,11 @@ __releases(fiq->waitq.lock)
int err;
list_del_init(&req->intr_entry);
req->intr_unique = fuse_get_unique(fiq);
memset(&ih, 0, sizeof(ih));
memset(&arg, 0, sizeof(arg));
ih.len = reqsize;
ih.opcode = FUSE_INTERRUPT;
ih.unique = req->intr_unique;
ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
arg.unique = req->in.h.unique;
spin_unlock(&fiq->waitq.lock);
......@@ -1238,6 +1287,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
struct fuse_req *req;
struct fuse_in *in;
unsigned reqsize;
unsigned int hash;
restart:
spin_lock(&fiq->waitq.lock);
......@@ -1310,13 +1360,16 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
err = reqsize;
goto out_end;
}
list_move_tail(&req->list, &fpq->processing);
spin_unlock(&fpq->lock);
hash = fuse_req_hash(req->in.h.unique);
list_move_tail(&req->list, &fpq->processing[hash]);
__fuse_get_request(req);
set_bit(FR_SENT, &req->flags);
spin_unlock(&fpq->lock);
/* matches barrier in request_wait_answer() */
smp_mb__after_atomic();
if (test_bit(FR_INTERRUPTED, &req->flags))
queue_interrupt(fiq, req);
fuse_put_request(fc, req);
return reqsize;
......@@ -1663,7 +1716,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
unsigned int num;
unsigned int offset;
size_t total_len = 0;
int num_pages;
unsigned int num_pages;
offset = outarg->offset & ~PAGE_MASK;
file_size = i_size_read(inode);
......@@ -1675,7 +1728,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
num = file_size - outarg->offset;
num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
num_pages = min(num_pages, fc->max_pages);
req = fuse_get_req(fc, num_pages);
if (IS_ERR(req))
......@@ -1792,10 +1845,11 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
/* Look up request on processing list by unique ID */
static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
{
unsigned int hash = fuse_req_hash(unique);
struct fuse_req *req;
list_for_each_entry(req, &fpq->processing, list) {
if (req->in.h.unique == unique || req->intr_unique == unique)
list_for_each_entry(req, &fpq->processing[hash], list) {
if (req->in.h.unique == unique)
return req;
}
return NULL;
......@@ -1869,22 +1923,26 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
if (!fpq->connected)
goto err_unlock_pq;
req = request_find(fpq, oh.unique);
req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
if (!req)
goto err_unlock_pq;
/* Is it an interrupt reply? */
if (req->intr_unique == oh.unique) {
/* Is it an interrupt reply ID? */
if (oh.unique & FUSE_INT_REQ_BIT) {
__fuse_get_request(req);
spin_unlock(&fpq->lock);
err = -EINVAL;
if (nbytes != sizeof(struct fuse_out_header))
if (nbytes != sizeof(struct fuse_out_header)) {
fuse_put_request(fc, req);
goto err_finish;
}
if (oh.error == -ENOSYS)
fc->no_interrupt = 1;
else if (oh.error == -EAGAIN)
queue_interrupt(&fc->iq, req);
fuse_put_request(fc, req);
fuse_copy_finish(cs);
return nbytes;
......@@ -2102,9 +2160,13 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
struct fuse_dev *fud;
struct fuse_req *req, *next;
LIST_HEAD(to_end);
unsigned int i;
/* Background queuing checks fc->connected under bg_lock */
spin_lock(&fc->bg_lock);
fc->connected = 0;
fc->blocked = 0;
spin_unlock(&fc->bg_lock);
fc->aborted = is_abort;
fuse_set_initialized(fc);
list_for_each_entry(fud, &fc->devices, entry) {
......@@ -2123,11 +2185,16 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
}
spin_unlock(&req->waitq.lock);
}
list_splice_tail_init(&fpq->processing, &to_end);
for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
list_splice_tail_init(&fpq->processing[i],
&to_end);
spin_unlock(&fpq->lock);
}
spin_lock(&fc->bg_lock);
fc->blocked = 0;
fc->max_background = UINT_MAX;
flush_bg_queue(fc);
spin_unlock(&fc->bg_lock);
spin_lock(&fiq->waitq.lock);
fiq->connected = 0;
......@@ -2163,10 +2230,12 @@ int fuse_dev_release(struct inode *inode, struct file *file)
struct fuse_conn *fc = fud->fc;
struct fuse_pqueue *fpq = &fud->pq;
LIST_HEAD(to_end);
unsigned int i;
spin_lock(&fpq->lock);
WARN_ON(!list_empty(&fpq->io));
list_splice_init(&fpq->processing, &to_end);
for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
list_splice_init(&fpq->processing[i], &to_end);
spin_unlock(&fpq->lock);
end_requests(fc, &to_end);
......
......@@ -14,24 +14,9 @@
#include <linux/namei.h>
#include <linux/slab.h>
#include <linux/xattr.h>
#include <linux/iversion.h>
#include <linux/posix_acl.h>
static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
{
struct fuse_conn *fc = get_fuse_conn(dir);
struct fuse_inode *fi = get_fuse_inode(dir);
if (!fc->do_readdirplus)
return false;
if (!fc->readdirplus_auto)
return true;
if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
return true;
if (ctx->pos == 0)
return true;
return false;
}
static void fuse_advise_use_readdirplus(struct inode *dir)
{
struct fuse_inode *fi = get_fuse_inode(dir);
......@@ -80,8 +65,7 @@ static u64 time_to_jiffies(u64 sec, u32 nsec)
* Set dentry and possibly attribute timeouts from the lookup/mk*
* replies
*/
static void fuse_change_entry_timeout(struct dentry *entry,
struct fuse_entry_out *o)
void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
{
fuse_dentry_settime(entry,
time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
......@@ -92,18 +76,29 @@ static u64 attr_timeout(struct fuse_attr_out *o)
return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
}
static u64 entry_attr_timeout(struct fuse_entry_out *o)
u64 entry_attr_timeout(struct fuse_entry_out *o)
{
return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
}
static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
{
set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
}
/*
* Mark the attributes as stale, so that at the next call to
* ->getattr() they will be fetched from userspace
*/
void fuse_invalidate_attr(struct inode *inode)
{
get_fuse_inode(inode)->i_time = 0;
fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
}
static void fuse_dir_changed(struct inode *dir)
{
fuse_invalidate_attr(dir);
inode_maybe_inc_iversion(dir, false);
}
/**
......@@ -113,7 +108,7 @@ void fuse_invalidate_attr(struct inode *inode)
void fuse_invalidate_atime(struct inode *inode)
{
if (!IS_RDONLY(inode))
fuse_invalidate_attr(inode);
fuse_invalidate_attr_mask(inode, STATX_ATIME);
}
/*
......@@ -262,11 +257,6 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
goto out;
}
static int invalid_nodeid(u64 nodeid)
{
return !nodeid || nodeid == FUSE_ROOT_ID;
}
static int fuse_dentry_init(struct dentry *dentry)
{
dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);
......@@ -469,7 +459,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
kfree(forget);
d_instantiate(entry, inode);
fuse_change_entry_timeout(entry, &outentry);
fuse_invalidate_attr(dir);
fuse_dir_changed(dir);
err = finish_open(file, entry, generic_file_open);
if (err) {
fuse_sync_release(ff, flags);
......@@ -583,7 +573,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
} else {
fuse_change_entry_timeout(entry, &outarg);
}
fuse_invalidate_attr(dir);
fuse_dir_changed(dir);
return 0;
out_put_forget_req:
......@@ -693,7 +683,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
drop_nlink(inode);
spin_unlock(&fc->lock);
fuse_invalidate_attr(inode);
fuse_invalidate_attr(dir);
fuse_dir_changed(dir);
fuse_invalidate_entry_cache(entry);
fuse_update_ctime(inode);
} else if (err == -EINTR)
......@@ -715,7 +705,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
err = fuse_simple_request(fc, &args);
if (!err) {
clear_nlink(d_inode(entry));
fuse_invalidate_attr(dir);
fuse_dir_changed(dir);
fuse_invalidate_entry_cache(entry);
} else if (err == -EINTR)
fuse_invalidate_entry(entry);
......@@ -754,9 +744,9 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
fuse_update_ctime(d_inode(newent));
}
fuse_invalidate_attr(olddir);
fuse_dir_changed(olddir);
if (olddir != newdir)
fuse_invalidate_attr(newdir);
fuse_dir_changed(newdir);
/* newent will end up negative */
if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
......@@ -932,7 +922,8 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
}
static int fuse_update_get_attr(struct inode *inode, struct file *file,
struct kstat *stat, unsigned int flags)
struct kstat *stat, u32 request_mask,
unsigned int flags)
{
struct fuse_inode *fi = get_fuse_inode(inode);
int err = 0;
......@@ -942,6 +933,8 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
sync = true;
else if (flags & AT_STATX_DONT_SYNC)
sync = false;
else if (request_mask & READ_ONCE(fi->inval_mask))
sync = true;
else
sync = time_before64(fi->i_time, get_jiffies_64());
......@@ -959,7 +952,9 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
int fuse_update_attributes(struct inode *inode, struct file *file)
{
return fuse_update_get_attr(inode, file, NULL, 0);
/* Do *not* need to get atime for internal purposes */
return fuse_update_get_attr(inode, file, NULL,
STATX_BASIC_STATS & ~STATX_ATIME, 0);
}
int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
......@@ -989,7 +984,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
if (!entry)
goto unlock;
fuse_invalidate_attr(parent);
fuse_dir_changed(parent);
fuse_invalidate_entry(entry);
if (child_nodeid != 0 && d_really_is_positive(entry)) {
......@@ -1165,271 +1160,78 @@ static int fuse_permission(struct inode *inode, int mask)
return err;
}
static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx)
{
while (nbytes >= FUSE_NAME_OFFSET) {
struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
size_t reclen = FUSE_DIRENT_SIZE(dirent);
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
return -EIO;
if (!dir_emit(ctx, dirent->name, dirent->namelen,
dirent->ino, dirent->type))
break;
buf += reclen;
nbytes -= reclen;
ctx->pos = dirent->off;
}
return 0;
}
static int fuse_direntplus_link(struct file *file,
struct fuse_direntplus *direntplus,
u64 attr_version)
{
struct fuse_entry_out *o = &direntplus->entry_out;
struct fuse_dirent *dirent = &direntplus->dirent;
struct dentry *parent = file->f_path.dentry;
struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
struct dentry *dentry;
struct dentry *alias;
struct inode *dir = d_inode(parent);
struct fuse_conn *fc;
struct inode *inode;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
if (!o->nodeid) {
/*
* Unlike in the case of fuse_lookup, zero nodeid does not mean
* ENOENT. Instead, it only means the userspace filesystem did
* not want to return attributes/handle for this entry.
*
* So do nothing.
*/
return 0;
}
if (name.name[0] == '.') {
/*
* We could potentially refresh the attributes of the directory
* and its parent?
*/
if (name.len == 1)
return 0;
if (name.name[1] == '.' && name.len == 2)
return 0;
}
if (invalid_nodeid(o->nodeid))
return -EIO;
if (!fuse_valid_type(o->attr.mode))
return -EIO;
fc = get_fuse_conn(dir);
name.hash = full_name_hash(parent, name.name, name.len);
dentry = d_lookup(parent, &name);
if (!dentry) {
retry:
dentry = d_alloc_parallel(parent, &name, &wq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
if (!d_in_lookup(dentry)) {
struct fuse_inode *fi;
inode = d_inode(dentry);
if (!inode ||
get_node_id(inode) != o->nodeid ||
((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
d_invalidate(dentry);
dput(dentry);
goto retry;
}
if (is_bad_inode(inode)) {
dput(dentry);
return -EIO;
}
fi = get_fuse_inode(inode);
spin_lock(&fc->lock);
fi->nlookup++;
spin_unlock(&fc->lock);
forget_all_cached_acls(inode);
fuse_change_attributes(inode, &o->attr,
entry_attr_timeout(o),
attr_version);
/*
* The other branch comes via fuse_iget()
* which bumps nlookup inside
*/
} else {
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
&o->attr, entry_attr_timeout(o),
attr_version);
if (!inode)
inode = ERR_PTR(-ENOMEM);
alias = d_splice_alias(inode, dentry);
d_lookup_done(dentry);
if (alias) {
dput(dentry);
dentry = alias;
}
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
if (fc->readdirplus_auto)
set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
fuse_change_entry_timeout(dentry, o);
dput(dentry);
return 0;
}
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx, u64 attr_version)
static int fuse_readlink_page(struct inode *inode, struct page *page)
{
struct fuse_direntplus *direntplus;
struct fuse_dirent *dirent;
size_t reclen;
int over = 0;
int ret;
while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
direntplus = (struct fuse_direntplus *) buf;
dirent = &direntplus->dirent;
reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
return -EIO;
if (!over) {
/* We fill entries into dstbuf only as much as
it can hold. But we still continue iterating
over remaining entries to link them. If not,
we need to send a FORGET for each of those
which we did not link.
*/
over = !dir_emit(ctx, dirent->name, dirent->namelen,
dirent->ino, dirent->type);
if (!over)
ctx->pos = dirent->off;
}
buf += reclen;
nbytes -= reclen;
ret = fuse_direntplus_link(file, direntplus, attr_version);
if (ret)
fuse_force_forget(file, direntplus->entry_out.nodeid);
}
return 0;
}
static int fuse_readdir(struct file *file, struct dir_context *ctx)
{
int plus, err;
size_t nbytes;
struct page *page;
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
u64 attr_version = 0;
bool locked;
if (is_bad_inode(inode))
return -EIO;
int err;
req = fuse_get_req(fc, 1);
if (IS_ERR(req))
return PTR_ERR(req);
page = alloc_page(GFP_KERNEL);
if (!page) {
fuse_put_request(fc, req);
return -ENOMEM;
}
plus = fuse_use_readdirplus(inode, ctx);
req->out.page_zeroing = 1;
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
req->page_descs[0].length = PAGE_SIZE;
if (plus) {
attr_version = fuse_get_attr_version(fc);
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIRPLUS);
} else {
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIR);
}
locked = fuse_lock_inode(inode);
req->page_descs[0].length = PAGE_SIZE - 1;
req->in.h.opcode = FUSE_READLINK;
req->in.h.nodeid = get_node_id(inode);
req->out.argvar = 1;
req->out.numargs = 1;
req->out.args[0].size = PAGE_SIZE - 1;
fuse_request_send(fc, req);
fuse_unlock_inode(inode, locked);
nbytes = req->out.args[0].size;
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err) {
if (plus) {
err = parse_dirplusfile(page_address(page), nbytes,
file, ctx,
attr_version);
} else {
err = parse_dirfile(page_address(page), nbytes, file,
ctx);
}
char *link = page_address(page);
size_t len = req->out.args[0].size;
BUG_ON(len >= PAGE_SIZE);
link[len] = '\0';
}
__free_page(page);
fuse_put_request(fc, req);
fuse_invalidate_atime(inode);
return err;
}
static const char *fuse_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
{
struct fuse_conn *fc = get_fuse_conn(inode);
FUSE_ARGS(args);
char *link;
ssize_t ret;
struct page *page;
int err;
err = -EIO;
if (is_bad_inode(inode))
goto out_err;
if (fc->cache_symlinks)
return page_get_link(dentry, inode, callback);
err = -ECHILD;
if (!dentry)
return ERR_PTR(-ECHILD);
goto out_err;
link = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!link)
return ERR_PTR(-ENOMEM);
page = alloc_page(GFP_KERNEL);
err = -ENOMEM;
if (!page)
goto out_err;
args.in.h.opcode = FUSE_READLINK;
args.in.h.nodeid = get_node_id(inode);
args.out.argvar = 1;
args.out.numargs = 1;
args.out.args[0].size = PAGE_SIZE - 1;
args.out.args[0].value = link;
ret = fuse_simple_request(fc, &args);
if (ret < 0) {
kfree(link);
link = ERR_PTR(ret);
} else {
link[ret] = '\0';
set_delayed_call(done, kfree_link, link);
err = fuse_readlink_page(inode, page);
if (err) {
__free_page(page);
goto out_err;
}
fuse_invalidate_atime(inode);
return link;
set_delayed_call(callback, page_put_link, page);
return page_address(page);
out_err:
return ERR_PTR(err);
}
static int fuse_dir_open(struct inode *inode, struct file *file)
......@@ -1662,8 +1464,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
file = NULL;
}
if (attr->ia_valid & ATTR_SIZE)
if (attr->ia_valid & ATTR_SIZE) {
if (WARN_ON(!S_ISREG(inode->i_mode)))
return -EIO;
is_truncate = true;
}
if (is_truncate) {
fuse_set_nowrite(inode);
......@@ -1811,7 +1616,7 @@ static int fuse_getattr(const struct path *path, struct kstat *stat,
if (!fuse_allow_current_process(fc))
return -EACCES;
return fuse_update_get_attr(inode, NULL, stat, flags);
return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
}
static const struct inode_operations fuse_dir_inode_operations = {
......@@ -1867,11 +1672,37 @@ void fuse_init_common(struct inode *inode)
void fuse_init_dir(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
inode->i_op = &fuse_dir_inode_operations;
inode->i_fop = &fuse_dir_operations;
spin_lock_init(&fi->rdc.lock);
fi->rdc.cached = false;
fi->rdc.size = 0;
fi->rdc.pos = 0;
fi->rdc.version = 0;
}
static int fuse_symlink_readpage(struct file *null, struct page *page)
{
int err = fuse_readlink_page(page->mapping->host, page);
if (!err)
SetPageUptodate(page);
unlock_page(page);
return err;
}
static const struct address_space_operations fuse_symlink_aops = {
.readpage = fuse_symlink_readpage,
};
void fuse_init_symlink(struct inode *inode)
{
inode->i_op = &fuse_symlink_inode_operations;
inode->i_data.a_ops = &fuse_symlink_aops;
inode_nohighmem(inode);
}
......@@ -59,6 +59,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
}
INIT_LIST_HEAD(&ff->write_entry);
mutex_init(&ff->readdir.lock);
refcount_set(&ff->count, 1);
RB_CLEAR_NODE(&ff->polled_node);
init_waitqueue_head(&ff->poll_wait);
......@@ -73,6 +74,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
void fuse_file_free(struct fuse_file *ff)
{
fuse_request_free(ff->reserved_req);
mutex_destroy(&ff->readdir.lock);
kfree(ff);
}
......@@ -848,11 +850,11 @@ static int fuse_readpages_fill(void *_data, struct page *page)
fuse_wait_on_page_writeback(inode, page->index);
if (req->num_pages &&
(req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
(req->num_pages == fc->max_pages ||
(req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
req->pages[req->num_pages - 1]->index + 1 != page->index)) {
int nr_alloc = min_t(unsigned, data->nr_pages,
FUSE_MAX_PAGES_PER_REQ);
unsigned int nr_alloc = min_t(unsigned int, data->nr_pages,
fc->max_pages);
fuse_send_readpages(req, data->file);
if (fc->async_read)
req = fuse_get_req_for_background(fc, nr_alloc);
......@@ -887,7 +889,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_fill_data data;
int err;
int nr_alloc = min_t(unsigned, nr_pages, FUSE_MAX_PAGES_PER_REQ);
unsigned int nr_alloc = min_t(unsigned int, nr_pages, fc->max_pages);
err = -EIO;
if (is_bad_inode(inode))
......@@ -1102,12 +1104,13 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
return count > 0 ? count : err;
}
static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
static inline unsigned int fuse_wr_pages(loff_t pos, size_t len,
unsigned int max_pages)
{
return min_t(unsigned,
return min_t(unsigned int,
((pos + len - 1) >> PAGE_SHIFT) -
(pos >> PAGE_SHIFT) + 1,
FUSE_MAX_PAGES_PER_REQ);
max_pages);
}
static ssize_t fuse_perform_write(struct kiocb *iocb,
......@@ -1129,7 +1132,8 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
do {
struct fuse_req *req;
ssize_t count;
unsigned nr_pages = fuse_wr_pages(pos, iov_iter_count(ii));
unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii),
fc->max_pages);
req = fuse_get_req(fc, nr_pages);
if (IS_ERR(req)) {
......@@ -1319,11 +1323,6 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
return ret < 0 ? ret : 0;
}
static inline int fuse_iter_npages(const struct iov_iter *ii_p)
{
return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
}
ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
loff_t *ppos, int flags)
{
......@@ -1343,9 +1342,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
int err = 0;
if (io->async)
req = fuse_get_req_for_background(fc, fuse_iter_npages(iter));
req = fuse_get_req_for_background(fc, iov_iter_npages(iter,
fc->max_pages));
else
req = fuse_get_req(fc, fuse_iter_npages(iter));
req = fuse_get_req(fc, iov_iter_npages(iter, fc->max_pages));
if (IS_ERR(req))
return PTR_ERR(req);
......@@ -1390,9 +1390,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
fuse_put_request(fc, req);
if (io->async)
req = fuse_get_req_for_background(fc,
fuse_iter_npages(iter));
iov_iter_npages(iter, fc->max_pages));
else
req = fuse_get_req(fc, fuse_iter_npages(iter));
req = fuse_get_req(fc, iov_iter_npages(iter,
fc->max_pages));
if (IS_ERR(req))
break;
}
......@@ -1418,7 +1419,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
res = fuse_direct_io(io, iter, ppos, 0);
fuse_invalidate_attr(inode);
fuse_invalidate_atime(inode);
return res;
}
......@@ -1487,6 +1488,7 @@ __acquires(fc->lock)
struct fuse_inode *fi = get_fuse_inode(req->inode);
struct fuse_write_in *inarg = &req->misc.write.in;
__u64 data_size = req->num_pages * PAGE_SIZE;
bool queued;
if (!fc->connected)
goto out_free;
......@@ -1502,7 +1504,8 @@ __acquires(fc->lock)
req->in.args[1].size = inarg->size;
fi->writectr++;
fuse_request_send_background_locked(fc, req);
queued = fuse_request_queue_background(fc, req);
WARN_ON(!queued);
return;
out_free:
......@@ -1819,12 +1822,18 @@ static int fuse_writepages_fill(struct page *page,
is_writeback = fuse_page_is_writeback(inode, page->index);
if (req && req->num_pages &&
(is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
(is_writeback || req->num_pages == fc->max_pages ||
(req->num_pages + 1) * PAGE_SIZE > fc->max_write ||
data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
fuse_writepages_send(data);
data->req = NULL;
} else if (req && req->num_pages == req->max_pages) {
if (!fuse_req_realloc_pages(fc, req, GFP_NOFS)) {
fuse_writepages_send(data);
req = data->req = NULL;
}
}
err = -ENOMEM;
tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
if (!tmp_page)
......@@ -1847,7 +1856,7 @@ static int fuse_writepages_fill(struct page *page,
struct fuse_inode *fi = get_fuse_inode(inode);
err = -ENOMEM;
req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
req = fuse_request_alloc_nofs(FUSE_REQ_INLINE_PAGES);
if (!req) {
__free_page(tmp_page);
goto out_unlock;
......@@ -1904,6 +1913,7 @@ static int fuse_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_fill_wb_data data;
int err;
......@@ -1916,7 +1926,7 @@ static int fuse_writepages(struct address_space *mapping,
data.ff = NULL;
err = -ENOMEM;
data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ,
data.orig_pages = kcalloc(fc->max_pages,
sizeof(struct page *),
GFP_NOFS);
if (!data.orig_pages)
......@@ -2387,10 +2397,11 @@ static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
}
/* Make sure iov_length() won't overflow */
static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov,
size_t count)
{
size_t n;
u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
u32 max = fc->max_pages << PAGE_SHIFT;
for (n = 0; n < count; n++, iov++) {
if (iov->iov_len > (size_t) max)
......@@ -2514,7 +2525,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
err = -ENOMEM;
pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL);
pages = kcalloc(fc->max_pages, sizeof(pages[0]), GFP_KERNEL);
iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
if (!pages || !iov_page)
goto out;
......@@ -2553,7 +2564,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
/* make sure there are enough buffer pages and init request with them */
err = -ENOMEM;
if (max_pages > FUSE_MAX_PAGES_PER_REQ)
if (max_pages > fc->max_pages)
goto out;
while (num_pages < max_pages) {
pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
......@@ -2640,11 +2651,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
in_iov = iov_page;
out_iov = in_iov + in_iovs;
err = fuse_verify_ioctl_iov(in_iov, in_iovs);
err = fuse_verify_ioctl_iov(fc, in_iov, in_iovs);
if (err)
goto out;
err = fuse_verify_ioctl_iov(out_iov, out_iovs);
err = fuse_verify_ioctl_iov(fc, out_iov, out_iovs);
if (err)
goto out;
......@@ -2835,9 +2846,9 @@ static void fuse_do_truncate(struct file *file)
fuse_do_setattr(file_dentry(file), &attr, file);
}
static inline loff_t fuse_round_up(loff_t off)
static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off)
{
return round_up(off, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
return round_up(off, fc->max_pages << PAGE_SHIFT);
}
static ssize_t
......@@ -2866,7 +2877,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) {
if (offset >= i_size)
return 0;
iov_iter_truncate(iter, fuse_round_up(i_size - offset));
iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset));
count = iov_iter_count(iter);
}
......@@ -3011,6 +3022,82 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
return err;
}
static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags)
{
struct fuse_file *ff_in = file_in->private_data;
struct fuse_file *ff_out = file_out->private_data;
struct inode *inode_out = file_inode(file_out);
struct fuse_inode *fi_out = get_fuse_inode(inode_out);
struct fuse_conn *fc = ff_in->fc;
FUSE_ARGS(args);
struct fuse_copy_file_range_in inarg = {
.fh_in = ff_in->fh,
.off_in = pos_in,
.nodeid_out = ff_out->nodeid,
.fh_out = ff_out->fh,
.off_out = pos_out,
.len = len,
.flags = flags
};
struct fuse_write_out outarg;
ssize_t err;
/* mark unstable when write-back is not used, and file_out gets
* extended */
bool is_unstable = (!fc->writeback_cache) &&
((pos_out + len) > inode_out->i_size);
if (fc->no_copy_file_range)
return -EOPNOTSUPP;
inode_lock(inode_out);
if (fc->writeback_cache) {
err = filemap_write_and_wait_range(inode_out->i_mapping,
pos_out, pos_out + len);
if (err)
goto out;
fuse_sync_writes(inode_out);
}
if (is_unstable)
set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
args.in.h.opcode = FUSE_COPY_FILE_RANGE;
args.in.h.nodeid = ff_in->nodeid;
args.in.numargs = 1;
args.in.args[0].size = sizeof(inarg);
args.in.args[0].value = &inarg;
args.out.numargs = 1;
args.out.args[0].size = sizeof(outarg);
args.out.args[0].value = &outarg;
err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_copy_file_range = 1;
err = -EOPNOTSUPP;
}
if (err)
goto out;
if (fc->writeback_cache) {
fuse_write_update_size(inode_out, pos_out + outarg.size);
file_update_time(file_out);
}
fuse_invalidate_attr(inode_out);
err = outarg.size;
out:
if (is_unstable)
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
inode_unlock(inode_out);
return err;
}
static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
.read_iter = fuse_file_read_iter,
......@@ -3027,6 +3114,7 @@ static const struct file_operations fuse_file_operations = {
.compat_ioctl = fuse_file_compat_ioctl,
.poll = fuse_file_poll,
.fallocate = fuse_file_fallocate,
.copy_file_range = fuse_copy_file_range,
};
static const struct file_operations fuse_direct_io_file_operations = {
......@@ -3062,6 +3150,14 @@ static const struct address_space_operations fuse_file_aops = {
void fuse_init_file_inode(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
inode->i_fop = &fuse_file_operations;
inode->i_data.a_ops = &fuse_file_aops;
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
fi->writectr = 0;
init_waitqueue_head(&fi->page_waitq);
INIT_LIST_HEAD(&fi->writepages);
}
......@@ -28,8 +28,11 @@
#include <linux/refcount.h>
#include <linux/user_namespace.h>
/** Max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32
/** Default max number of pages that can be used in a single read request */
#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
/** Maximum of max_pages received in init_out */
#define FUSE_MAX_MAX_PAGES 256
/** Bias for fi->writectr, meaning new writepages must not be sent */
#define FUSE_NOWRITE INT_MIN
......@@ -77,6 +80,9 @@ struct fuse_inode {
/** Time in jiffies until the file attributes are valid */
u64 i_time;
/* Which attributes are invalid */
u32 inval_mask;
/** The sticky bit in inode->i_mode may have been removed, so
preserve the original mode */
umode_t orig_i_mode;
......@@ -87,21 +93,51 @@ struct fuse_inode {
/** Version of last attribute change */
u64 attr_version;
/** Files usable in writepage. Protected by fc->lock */
struct list_head write_files;
union {
/* Write related fields (regular file only) */
struct {
/* Files usable in writepage. Protected by fc->lock */
struct list_head write_files;
/* Writepages pending on truncate or fsync */
struct list_head queued_writes;
/** Writepages pending on truncate or fsync */
struct list_head queued_writes;
/* Number of sent writes, a negative bias
* (FUSE_NOWRITE) means more writes are blocked */
int writectr;
/* Waitq for writepage completion */
wait_queue_head_t page_waitq;
/* List of writepage requestst (pending or sent) */
struct list_head writepages;
};
/* readdir cache (directory only) */
struct {
/* true if fully cached */
bool cached;
/** Number of sent writes, a negative bias (FUSE_NOWRITE)
* means more writes are blocked */
int writectr;
/* size of cache */
loff_t size;
/** Waitq for writepage completion */
wait_queue_head_t page_waitq;
/* position at end of cache (position of next entry) */
loff_t pos;
/** List of writepage requestst (pending or sent) */
struct list_head writepages;
/* version of the cache */
u64 version;
/* modification time of directory when cache was
* started */
struct timespec64 mtime;
/* iversion of directory when cache was started */
u64 iversion;
/* protects above fields */
spinlock_t lock;
} rdc;
};
/** Miscellaneous bits describing inode state */
unsigned long state;
......@@ -148,6 +184,25 @@ struct fuse_file {
/** Entry on inode's write_files list */
struct list_head write_entry;
/* Readdir related */
struct {
/*
* Protects below fields against (crazy) parallel readdir on
* same open file. Uncontended in the normal case.
*/
struct mutex lock;
/* Dir stream position */
loff_t pos;
/* Offset in cache */
loff_t cache_off;
/* Version of cache we are reading */
u64 version;
} readdir;
/** RB node to be linked on fuse_conn->polled_files */
struct rb_node polled_node;
......@@ -311,9 +366,6 @@ struct fuse_req {
/** refcount */
refcount_t count;
/** Unique ID for the interrupt request */
u64 intr_unique;
/* Request flags, updated with test/set/clear_bit() */
unsigned long flags;
......@@ -411,6 +463,9 @@ struct fuse_iqueue {
struct fasync_struct *fasync;
};
#define FUSE_PQ_HASH_BITS 8
#define FUSE_PQ_HASH_SIZE (1 << FUSE_PQ_HASH_BITS)
struct fuse_pqueue {
/** Connection established */
unsigned connected;
......@@ -418,8 +473,8 @@ struct fuse_pqueue {
/** Lock protecting accessess to members of this structure */
spinlock_t lock;
/** The list of requests being processed */
struct list_head processing;
/** Hash table of requests being processed */
struct list_head *processing;
/** The list of requests under I/O */
struct list_head io;
......@@ -476,6 +531,9 @@ struct fuse_conn {
/** Maximum write size */
unsigned max_write;
/** Maxmum number of pages that can be used in a single request */
unsigned int max_pages;
/** Input queue */
struct fuse_iqueue iq;
......@@ -500,6 +558,10 @@ struct fuse_conn {
/** The list of background requests set aside for later queuing */
struct list_head bg_queue;
/** Protects: max_background, congestion_threshold, num_background,
* active_background, bg_queue, blocked */
spinlock_t bg_lock;
/** Flag indicating that INIT reply has been received. Allocating
* any fuse request will be suspended until the flag is set */
int initialized;
......@@ -551,6 +613,9 @@ struct fuse_conn {
/** handle fs handles killing suid/sgid/cap on write/chown/trunc */
unsigned handle_killpriv:1;
/** cache READLINK responses in page cache */
unsigned cache_symlinks:1;
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
......@@ -637,6 +702,9 @@ struct fuse_conn {
/** Allow other than the mounter user to access the filesystem ? */
unsigned allow_other:1;
/** Does the filesystem support copy_file_range? */
unsigned no_copy_file_range:1;
/** The number of requests waiting for completion */
atomic_t num_waiting;
......@@ -697,6 +765,11 @@ static inline u64 get_node_id(struct inode *inode)
return get_fuse_inode(inode)->nodeid;
}
static inline int invalid_nodeid(u64 nodeid)
{
return !nodeid || nodeid == FUSE_ROOT_ID;
}
/** Device operations */
extern const struct file_operations fuse_dev_operations;
......@@ -812,6 +885,10 @@ struct fuse_req *fuse_request_alloc(unsigned npages);
struct fuse_req *fuse_request_alloc_nofs(unsigned npages);
bool fuse_req_realloc_pages(struct fuse_conn *fc, struct fuse_req *req,
gfp_t flags);
/**
* Free a request
*/
......@@ -856,9 +933,7 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args);
* Send a request in the background
*/
void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
void fuse_request_send_background_locked(struct fuse_conn *fc,
struct fuse_req *req);
bool fuse_request_queue_background(struct fuse_conn *fc, struct fuse_req *req);
/* Abort all requests */
void fuse_abort_conn(struct fuse_conn *fc, bool is_abort);
......@@ -873,6 +948,9 @@ void fuse_invalidate_entry_cache(struct dentry *entry);
void fuse_invalidate_atime(struct inode *inode);
u64 entry_attr_timeout(struct fuse_entry_out *o);
void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o);
/**
* Acquire reference to fuse_conn
*/
......@@ -992,4 +1070,8 @@ struct posix_acl;
struct posix_acl *fuse_get_acl(struct inode *inode, int type);
int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);
/* readdir.c */
int fuse_readdir(struct file *file, struct dir_context *ctx);
#endif /* _FS_FUSE_I_H */
......@@ -90,16 +90,12 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi = get_fuse_inode(inode);
fi->i_time = 0;
fi->inval_mask = 0;
fi->nodeid = 0;
fi->nlookup = 0;
fi->attr_version = 0;
fi->writectr = 0;
fi->orig_ino = 0;
fi->state = 0;
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
INIT_LIST_HEAD(&fi->writepages);
init_waitqueue_head(&fi->page_waitq);
mutex_init(&fi->mutex);
fi->forget = fuse_alloc_forget();
if (!fi->forget) {
......@@ -119,8 +115,10 @@ static void fuse_i_callback(struct rcu_head *head)
static void fuse_destroy_inode(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
BUG_ON(!list_empty(&fi->write_files));
BUG_ON(!list_empty(&fi->queued_writes));
if (S_ISREG(inode->i_mode)) {
WARN_ON(!list_empty(&fi->write_files));
WARN_ON(!list_empty(&fi->queued_writes));
}
mutex_destroy(&fi->mutex);
kfree(fi->forget);
call_rcu(&inode->i_rcu, fuse_i_callback);
......@@ -167,6 +165,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
fi->attr_version = ++fc->attr_version;
fi->i_time = attr_valid;
WRITE_ONCE(fi->inval_mask, 0);
inode->i_ino = fuse_squash_ino(attr->ino);
inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
......@@ -594,9 +593,11 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq)
static void fuse_pqueue_init(struct fuse_pqueue *fpq)
{
memset(fpq, 0, sizeof(struct fuse_pqueue));
unsigned int i;
spin_lock_init(&fpq->lock);
INIT_LIST_HEAD(&fpq->processing);
for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
INIT_LIST_HEAD(&fpq->processing[i]);
INIT_LIST_HEAD(&fpq->io);
fpq->connected = 1;
}
......@@ -605,6 +606,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
{
memset(fc, 0, sizeof(*fc));
spin_lock_init(&fc->lock);
spin_lock_init(&fc->bg_lock);
init_rwsem(&fc->killsb);
refcount_set(&fc->count, 1);
atomic_set(&fc->dev_count, 1);
......@@ -852,6 +854,7 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
sanitize_global_limit(&max_user_bgreq);
sanitize_global_limit(&max_user_congthresh);
spin_lock(&fc->bg_lock);
if (arg->max_background) {
fc->max_background = arg->max_background;
......@@ -865,6 +868,7 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
fc->congestion_threshold > max_user_congthresh)
fc->congestion_threshold = max_user_congthresh;
}
spin_unlock(&fc->bg_lock);
}
static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
......@@ -924,8 +928,15 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->posix_acl = 1;
fc->sb->s_xattr = fuse_acl_xattr_handlers;
}
if (arg->flags & FUSE_CACHE_SYMLINKS)
fc->cache_symlinks = 1;
if (arg->flags & FUSE_ABORT_ERROR)
fc->abort_err = 1;
if (arg->flags & FUSE_MAX_PAGES) {
fc->max_pages =
min_t(unsigned int, FUSE_MAX_MAX_PAGES,
max_t(unsigned int, arg->max_pages, 1));
}
} else {
ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
......@@ -957,7 +968,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
FUSE_ABORT_ERROR;
FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
......@@ -1022,17 +1033,26 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc)
{
struct fuse_dev *fud;
struct list_head *pq;
fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
if (fud) {
fud->fc = fuse_conn_get(fc);
fuse_pqueue_init(&fud->pq);
if (!fud)
return NULL;
spin_lock(&fc->lock);
list_add_tail(&fud->entry, &fc->devices);
spin_unlock(&fc->lock);
pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
if (!pq) {
kfree(fud);
return NULL;
}
fud->pq.processing = pq;
fud->fc = fuse_conn_get(fc);
fuse_pqueue_init(&fud->pq);
spin_lock(&fc->lock);
list_add_tail(&fud->entry, &fc->devices);
spin_unlock(&fc->lock);
return fud;
}
EXPORT_SYMBOL_GPL(fuse_dev_alloc);
......@@ -1141,6 +1161,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
fc->user_id = d.user_id;
fc->group_id = d.group_id;
fc->max_read = max_t(unsigned, 4096, d.max_read);
fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
/* Used by get_root_inode() */
sb->s_fs_info = fc;
......
/*
FUSE: Filesystem in Userspace
Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu>
This program can be distributed under the terms of the GNU GPL.
See the file COPYING.
*/
#include "fuse_i.h"
#include <linux/iversion.h>
#include <linux/posix_acl.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
{
struct fuse_conn *fc = get_fuse_conn(dir);
struct fuse_inode *fi = get_fuse_inode(dir);
if (!fc->do_readdirplus)
return false;
if (!fc->readdirplus_auto)
return true;
if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
return true;
if (ctx->pos == 0)
return true;
return false;
}
static void fuse_add_dirent_to_cache(struct file *file,
struct fuse_dirent *dirent, loff_t pos)
{
struct fuse_inode *fi = get_fuse_inode(file_inode(file));
size_t reclen = FUSE_DIRENT_SIZE(dirent);
pgoff_t index;
struct page *page;
loff_t size;
u64 version;
unsigned int offset;
void *addr;
spin_lock(&fi->rdc.lock);
/*
* Is cache already completed? Or this entry does not go at the end of
* cache?
*/
if (fi->rdc.cached || pos != fi->rdc.pos) {
spin_unlock(&fi->rdc.lock);
return;
}
version = fi->rdc.version;
size = fi->rdc.size;
offset = size & ~PAGE_MASK;
index = size >> PAGE_SHIFT;
/* Dirent doesn't fit in current page? Jump to next page. */
if (offset + reclen > PAGE_SIZE) {
index++;
offset = 0;
}
spin_unlock(&fi->rdc.lock);
if (offset) {
page = find_lock_page(file->f_mapping, index);
} else {
page = find_or_create_page(file->f_mapping, index,
mapping_gfp_mask(file->f_mapping));
}
if (!page)
return;
spin_lock(&fi->rdc.lock);
/* Raced with another readdir */
if (fi->rdc.version != version || fi->rdc.size != size ||
WARN_ON(fi->rdc.pos != pos))
goto unlock;
addr = kmap_atomic(page);
if (!offset)
clear_page(addr);
memcpy(addr + offset, dirent, reclen);
kunmap_atomic(addr);
fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
fi->rdc.pos = dirent->off;
unlock:
spin_unlock(&fi->rdc.lock);
unlock_page(page);
put_page(page);
}
static void fuse_readdir_cache_end(struct file *file, loff_t pos)
{
struct fuse_inode *fi = get_fuse_inode(file_inode(file));
loff_t end;
spin_lock(&fi->rdc.lock);
/* does cache end position match current position? */
if (fi->rdc.pos != pos) {
spin_unlock(&fi->rdc.lock);
return;
}
fi->rdc.cached = true;
end = ALIGN(fi->rdc.size, PAGE_SIZE);
spin_unlock(&fi->rdc.lock);
/* truncate unused tail of cache */
truncate_inode_pages(file->f_mapping, end);
}
static bool fuse_emit(struct file *file, struct dir_context *ctx,
struct fuse_dirent *dirent)
{
struct fuse_file *ff = file->private_data;
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_add_dirent_to_cache(file, dirent, ctx->pos);
return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
dirent->type);
}
static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx)
{
while (nbytes >= FUSE_NAME_OFFSET) {
struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
size_t reclen = FUSE_DIRENT_SIZE(dirent);
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
return -EIO;
if (!fuse_emit(file, ctx, dirent))
break;
buf += reclen;
nbytes -= reclen;
ctx->pos = dirent->off;
}
return 0;
}
static int fuse_direntplus_link(struct file *file,
struct fuse_direntplus *direntplus,
u64 attr_version)
{
struct fuse_entry_out *o = &direntplus->entry_out;
struct fuse_dirent *dirent = &direntplus->dirent;
struct dentry *parent = file->f_path.dentry;
struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
struct dentry *dentry;
struct dentry *alias;
struct inode *dir = d_inode(parent);
struct fuse_conn *fc;
struct inode *inode;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
if (!o->nodeid) {
/*
* Unlike in the case of fuse_lookup, zero nodeid does not mean
* ENOENT. Instead, it only means the userspace filesystem did
* not want to return attributes/handle for this entry.
*
* So do nothing.
*/
return 0;
}
if (name.name[0] == '.') {
/*
* We could potentially refresh the attributes of the directory
* and its parent?
*/
if (name.len == 1)
return 0;
if (name.name[1] == '.' && name.len == 2)
return 0;
}
if (invalid_nodeid(o->nodeid))
return -EIO;
if (!fuse_valid_type(o->attr.mode))
return -EIO;
fc = get_fuse_conn(dir);
name.hash = full_name_hash(parent, name.name, name.len);
dentry = d_lookup(parent, &name);
if (!dentry) {
retry:
dentry = d_alloc_parallel(parent, &name, &wq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
if (!d_in_lookup(dentry)) {
struct fuse_inode *fi;
inode = d_inode(dentry);
if (!inode ||
get_node_id(inode) != o->nodeid ||
((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
d_invalidate(dentry);
dput(dentry);
goto retry;
}
if (is_bad_inode(inode)) {
dput(dentry);
return -EIO;
}
fi = get_fuse_inode(inode);
spin_lock(&fc->lock);
fi->nlookup++;
spin_unlock(&fc->lock);
forget_all_cached_acls(inode);
fuse_change_attributes(inode, &o->attr,
entry_attr_timeout(o),
attr_version);
/*
* The other branch comes via fuse_iget()
* which bumps nlookup inside
*/
} else {
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
&o->attr, entry_attr_timeout(o),
attr_version);
if (!inode)
inode = ERR_PTR(-ENOMEM);
alias = d_splice_alias(inode, dentry);
d_lookup_done(dentry);
if (alias) {
dput(dentry);
dentry = alias;
}
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
if (fc->readdirplus_auto)
set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
fuse_change_entry_timeout(dentry, o);
dput(dentry);
return 0;
}
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx, u64 attr_version)
{
struct fuse_direntplus *direntplus;
struct fuse_dirent *dirent;
size_t reclen;
int over = 0;
int ret;
while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
direntplus = (struct fuse_direntplus *) buf;
dirent = &direntplus->dirent;
reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
return -EIO;
if (!over) {
/* We fill entries into dstbuf only as much as
it can hold. But we still continue iterating
over remaining entries to link them. If not,
we need to send a FORGET for each of those
which we did not link.
*/
over = !fuse_emit(file, ctx, dirent);
if (!over)
ctx->pos = dirent->off;
}
buf += reclen;
nbytes -= reclen;
ret = fuse_direntplus_link(file, direntplus, attr_version);
if (ret)
fuse_force_forget(file, direntplus->entry_out.nodeid);
}
return 0;
}
static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
{
int plus, err;
size_t nbytes;
struct page *page;
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
u64 attr_version = 0;
bool locked;
req = fuse_get_req(fc, 1);
if (IS_ERR(req))
return PTR_ERR(req);
page = alloc_page(GFP_KERNEL);
if (!page) {
fuse_put_request(fc, req);
return -ENOMEM;
}
plus = fuse_use_readdirplus(inode, ctx);
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
req->page_descs[0].length = PAGE_SIZE;
if (plus) {
attr_version = fuse_get_attr_version(fc);
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIRPLUS);
} else {
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIR);
}
locked = fuse_lock_inode(inode);
fuse_request_send(fc, req);
fuse_unlock_inode(inode, locked);
nbytes = req->out.args[0].size;
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err) {
if (!nbytes) {
struct fuse_file *ff = file->private_data;
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_readdir_cache_end(file, ctx->pos);
} else if (plus) {
err = parse_dirplusfile(page_address(page), nbytes,
file, ctx, attr_version);
} else {
err = parse_dirfile(page_address(page), nbytes, file,
ctx);
}
}
__free_page(page);
fuse_invalidate_atime(inode);
return err;
}
enum fuse_parse_result {
FOUND_ERR = -1,
FOUND_NONE = 0,
FOUND_SOME,
FOUND_ALL,
};
static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
void *addr, unsigned int size,
struct dir_context *ctx)
{
unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
enum fuse_parse_result res = FOUND_NONE;
WARN_ON(offset >= size);
for (;;) {
struct fuse_dirent *dirent = addr + offset;
unsigned int nbytes = size - offset;
size_t reclen = FUSE_DIRENT_SIZE(dirent);
if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
break;
if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
return FOUND_ERR;
if (WARN_ON(reclen > nbytes))
return FOUND_ERR;
if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
return FOUND_ERR;
if (ff->readdir.pos == ctx->pos) {
res = FOUND_SOME;
if (!dir_emit(ctx, dirent->name, dirent->namelen,
dirent->ino, dirent->type))
return FOUND_ALL;
ctx->pos = dirent->off;
}
ff->readdir.pos = dirent->off;
ff->readdir.cache_off += reclen;
offset += reclen;
}
return res;
}
static void fuse_rdc_reset(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
fi->rdc.cached = false;
fi->rdc.version++;
fi->rdc.size = 0;
fi->rdc.pos = 0;
}
#define UNCACHED 1
static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
{
struct fuse_file *ff = file->private_data;
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
enum fuse_parse_result res;
pgoff_t index;
unsigned int size;
struct page *page;
void *addr;
/* Seeked? If so, reset the cache stream */
if (ff->readdir.pos != ctx->pos) {
ff->readdir.pos = 0;
ff->readdir.cache_off = 0;
}
/*
* We're just about to start reading into the cache or reading the
* cache; both cases require an up-to-date mtime value.
*/
if (!ctx->pos && fc->auto_inval_data) {
int err = fuse_update_attributes(inode, file);
if (err)
return err;
}
retry:
spin_lock(&fi->rdc.lock);
retry_locked:
if (!fi->rdc.cached) {
/* Starting cache? Set cache mtime. */
if (!ctx->pos && !fi->rdc.size) {
fi->rdc.mtime = inode->i_mtime;
fi->rdc.iversion = inode_query_iversion(inode);
}
spin_unlock(&fi->rdc.lock);
return UNCACHED;
}
/*
* When at the beginning of the directory (i.e. just after opendir(3) or
* rewinddir(3)), then need to check whether directory contents have
* changed, and reset the cache if so.
*/
if (!ctx->pos) {
if (inode_peek_iversion(inode) != fi->rdc.iversion ||
!timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
fuse_rdc_reset(inode);
goto retry_locked;
}
}
/*
* If cache version changed since the last getdents() call, then reset
* the cache stream.
*/
if (ff->readdir.version != fi->rdc.version) {
ff->readdir.pos = 0;
ff->readdir.cache_off = 0;
}
/*
* If at the beginning of the cache, than reset version to
* current.
*/
if (ff->readdir.pos == 0)
ff->readdir.version = fi->rdc.version;
WARN_ON(fi->rdc.size < ff->readdir.cache_off);
index = ff->readdir.cache_off >> PAGE_SHIFT;
if (index == (fi->rdc.size >> PAGE_SHIFT))
size = fi->rdc.size & ~PAGE_MASK;
else
size = PAGE_SIZE;
spin_unlock(&fi->rdc.lock);
/* EOF? */
if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
return 0;
page = find_get_page_flags(file->f_mapping, index,
FGP_ACCESSED | FGP_LOCK);
spin_lock(&fi->rdc.lock);
if (!page) {
/*
* Uh-oh: page gone missing, cache is useless
*/
if (fi->rdc.version == ff->readdir.version)
fuse_rdc_reset(inode);
goto retry_locked;
}
/* Make sure it's still the same version after getting the page. */
if (ff->readdir.version != fi->rdc.version) {
spin_unlock(&fi->rdc.lock);
unlock_page(page);
put_page(page);
goto retry;
}
spin_unlock(&fi->rdc.lock);
/*
* Contents of the page are now protected against changing by holding
* the page lock.
*/
addr = kmap(page);
res = fuse_parse_cache(ff, addr, size, ctx);
kunmap(page);
unlock_page(page);
put_page(page);
if (res == FOUND_ERR)
return -EIO;
if (res == FOUND_ALL)
return 0;
if (size == PAGE_SIZE) {
/* We hit end of page: skip to next page. */
ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
goto retry;
}
/*
* End of cache reached. If found position, then we are done, otherwise
* need to fall back to uncached, since the position we were looking for
* wasn't in the cache.
*/
return res == FOUND_SOME ? 0 : UNCACHED;
}
int fuse_readdir(struct file *file, struct dir_context *ctx)
{
struct fuse_file *ff = file->private_data;
struct inode *inode = file_inode(file);
int err;
if (is_bad_inode(inode))
return -EIO;
mutex_lock(&ff->readdir.lock);
err = UNCACHED;
if (ff->open_flags & FOPEN_CACHE_DIR)
err = fuse_readdir_cached(file, ctx);
if (err == UNCACHED)
err = fuse_readdir_uncached(file, ctx);
mutex_unlock(&ff->readdir.lock);
return err;
}
......@@ -236,33 +236,33 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
#ifdef __KERNEL__
#ifndef set_mask_bits
#define set_mask_bits(ptr, _mask, _bits) \
#define set_mask_bits(ptr, mask, bits) \
({ \
const typeof(*ptr) mask = (_mask), bits = (_bits); \
typeof(*ptr) old, new; \
const typeof(*(ptr)) mask__ = (mask), bits__ = (bits); \
typeof(*(ptr)) old__, new__; \
\
do { \
old = READ_ONCE(*ptr); \
new = (old & ~mask) | bits; \
} while (cmpxchg(ptr, old, new) != old); \
old__ = READ_ONCE(*(ptr)); \
new__ = (old__ & ~mask__) | bits__; \
} while (cmpxchg(ptr, old__, new__) != old__); \
\
new; \
new__; \
})
#endif
#ifndef bit_clear_unless
#define bit_clear_unless(ptr, _clear, _test) \
#define bit_clear_unless(ptr, clear, test) \
({ \
const typeof(*ptr) clear = (_clear), test = (_test); \
typeof(*ptr) old, new; \
const typeof(*(ptr)) clear__ = (clear), test__ = (test);\
typeof(*(ptr)) old__, new__; \
\
do { \
old = READ_ONCE(*ptr); \
new = old & ~clear; \
} while (!(old & test) && \
cmpxchg(ptr, old, new) != old); \
old__ = READ_ONCE(*(ptr)); \
new__ = old__ & ~clear__; \
} while (!(old__ & test__) && \
cmpxchg(ptr, old__, new__) != old__); \
\
!(old & test); \
!(old__ & test__); \
})
#endif
......
......@@ -116,6 +116,12 @@
*
* 7.27
* - add FUSE_ABORT_ERROR
*
* 7.28
* - add FUSE_COPY_FILE_RANGE
* - add FOPEN_CACHE_DIR
* - add FUSE_MAX_PAGES, add max_pages to init_out
* - add FUSE_CACHE_SYMLINKS
*/
#ifndef _LINUX_FUSE_H
......@@ -151,7 +157,7 @@
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
#define FUSE_KERNEL_MINOR_VERSION 27
#define FUSE_KERNEL_MINOR_VERSION 28
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
......@@ -219,10 +225,12 @@ struct fuse_file_lock {
* FOPEN_DIRECT_IO: bypass page cache for this open file
* FOPEN_KEEP_CACHE: don't invalidate the data cache on open
* FOPEN_NONSEEKABLE: the file is not seekable
* FOPEN_CACHE_DIR: allow caching this directory
*/
#define FOPEN_DIRECT_IO (1 << 0)
#define FOPEN_KEEP_CACHE (1 << 1)
#define FOPEN_NONSEEKABLE (1 << 2)
#define FOPEN_CACHE_DIR (1 << 3)
/**
* INIT request/reply flags
......@@ -249,6 +257,8 @@ struct fuse_file_lock {
* FUSE_HANDLE_KILLPRIV: fs handles killing suid/sgid/cap on write/chown/trunc
* FUSE_POSIX_ACL: filesystem supports posix acls
* FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED
* FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages
* FUSE_CACHE_SYMLINKS: cache READLINK responses
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
......@@ -272,6 +282,8 @@ struct fuse_file_lock {
#define FUSE_HANDLE_KILLPRIV (1 << 19)
#define FUSE_POSIX_ACL (1 << 20)
#define FUSE_ABORT_ERROR (1 << 21)
#define FUSE_MAX_PAGES (1 << 22)
#define FUSE_CACHE_SYMLINKS (1 << 23)
/**
* CUSE INIT request/reply flags
......@@ -337,53 +349,54 @@ struct fuse_file_lock {
#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
enum fuse_opcode {
FUSE_LOOKUP = 1,
FUSE_FORGET = 2, /* no reply */
FUSE_GETATTR = 3,
FUSE_SETATTR = 4,
FUSE_READLINK = 5,
FUSE_SYMLINK = 6,
FUSE_MKNOD = 8,
FUSE_MKDIR = 9,
FUSE_UNLINK = 10,
FUSE_RMDIR = 11,
FUSE_RENAME = 12,
FUSE_LINK = 13,
FUSE_OPEN = 14,
FUSE_READ = 15,
FUSE_WRITE = 16,
FUSE_STATFS = 17,
FUSE_RELEASE = 18,
FUSE_FSYNC = 20,
FUSE_SETXATTR = 21,
FUSE_GETXATTR = 22,
FUSE_LISTXATTR = 23,
FUSE_REMOVEXATTR = 24,
FUSE_FLUSH = 25,
FUSE_INIT = 26,
FUSE_OPENDIR = 27,
FUSE_READDIR = 28,
FUSE_RELEASEDIR = 29,
FUSE_FSYNCDIR = 30,
FUSE_GETLK = 31,
FUSE_SETLK = 32,
FUSE_SETLKW = 33,
FUSE_ACCESS = 34,
FUSE_CREATE = 35,
FUSE_INTERRUPT = 36,
FUSE_BMAP = 37,
FUSE_DESTROY = 38,
FUSE_IOCTL = 39,
FUSE_POLL = 40,
FUSE_NOTIFY_REPLY = 41,
FUSE_BATCH_FORGET = 42,
FUSE_FALLOCATE = 43,
FUSE_READDIRPLUS = 44,
FUSE_RENAME2 = 45,
FUSE_LSEEK = 46,
FUSE_LOOKUP = 1,
FUSE_FORGET = 2, /* no reply */
FUSE_GETATTR = 3,
FUSE_SETATTR = 4,
FUSE_READLINK = 5,
FUSE_SYMLINK = 6,
FUSE_MKNOD = 8,
FUSE_MKDIR = 9,
FUSE_UNLINK = 10,
FUSE_RMDIR = 11,
FUSE_RENAME = 12,
FUSE_LINK = 13,
FUSE_OPEN = 14,
FUSE_READ = 15,
FUSE_WRITE = 16,
FUSE_STATFS = 17,
FUSE_RELEASE = 18,
FUSE_FSYNC = 20,
FUSE_SETXATTR = 21,
FUSE_GETXATTR = 22,
FUSE_LISTXATTR = 23,
FUSE_REMOVEXATTR = 24,
FUSE_FLUSH = 25,
FUSE_INIT = 26,
FUSE_OPENDIR = 27,
FUSE_READDIR = 28,
FUSE_RELEASEDIR = 29,
FUSE_FSYNCDIR = 30,
FUSE_GETLK = 31,
FUSE_SETLK = 32,
FUSE_SETLKW = 33,
FUSE_ACCESS = 34,
FUSE_CREATE = 35,
FUSE_INTERRUPT = 36,
FUSE_BMAP = 37,
FUSE_DESTROY = 38,
FUSE_IOCTL = 39,
FUSE_POLL = 40,
FUSE_NOTIFY_REPLY = 41,
FUSE_BATCH_FORGET = 42,
FUSE_FALLOCATE = 43,
FUSE_READDIRPLUS = 44,
FUSE_RENAME2 = 45,
FUSE_LSEEK = 46,
FUSE_COPY_FILE_RANGE = 47,
/* CUSE specific operations */
CUSE_INIT = 4096,
CUSE_INIT = 4096,
};
enum fuse_notify_code {
......@@ -610,7 +623,9 @@ struct fuse_init_out {
uint16_t congestion_threshold;
uint32_t max_write;
uint32_t time_gran;
uint32_t unused[9];
uint16_t max_pages;
uint16_t padding;
uint32_t unused[8];
};
#define CUSE_INIT_INFO_MAX 4096
......@@ -792,4 +807,14 @@ struct fuse_lseek_out {
uint64_t offset;
};
struct fuse_copy_file_range_in {
uint64_t fh_in;
uint64_t off_in;
uint64_t nodeid_out;
uint64_t fh_out;
uint64_t off_out;
uint64_t len;
uint64_t flags;
};
#endif /* _LINUX_FUSE_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment