Commit d1e9a63d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'vfs-6.11-rc1.fixes.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:
 "VFS:

   - The new 64bit mount ids start after the old mount id, i.e., at the
     first non-32 bit value. However, we started counting one id too
     late and thus lost 4294967296 as the first valid id. Fix that.

   - Update a few comments on some vfs_*() creation helpers.

   - Move copying of the xattr name out from the locks required to start
     a filesystem write.

   - Extend the filelock lock UAF fix to the compat code as well.

   - Now that we added the ability to look up an inode under RCU it's
     possible that lockless hash lookup can find and lock an inode after
     it gets I_FREEING set. It then waits until inode teardown in
     evict() is finished.

     The flag however is still set after evict() has woken up all
     waiters. If the inode lock is taken late enough on the waiting side
     after hash removal and wakeup happened the waiting thread will
     never be woken.

     Before RCU based lookup this was synchronized via the
     inode_hash_lock. But since unhashing requires the inode lock as
     well we can check whether the inode is unhashed while holding inode
     lock even without holding inode_hash_lock.

  pidfd:

   - The nsproxy structure contains nearly all of the namespaces
     associated with a task. When a namespace type isn't supported
     nsproxy might contain a NULL pointer or always point to the initial
     namespace type. The logic isn't consistent. So when deriving
     namespace fds we need to ensure that the namespace type is
     supported.

     First, so that we don't risk dereferncing NULL pointers. The
     correct bigger fix would be to change all namespaces to always set
     a valid namespace pointer in struct nsproxy independent of whether
     or not it is compiled in. But that requires quite a few changes.

     Second, so that we don't allow deriving namespace fds when the
     namespace type doesn't exist and thus when they couldn't also be
     derived via /proc/self/ns/.

   - Add missing selftests for the new pidfd ioctls to derive namespace
     fds. This simply extends the already existing testsuite.

  netfs:

   - Fix debug logging and fix kconfig variable name so it actually
     works.

   - Fix writeback that goes both to the server and cache. The streams
     are only activated once a subreq is added. When a server write
     happens the subreq doesn't need to have finished by the time the
     cache write is started. If the server write has already finished by
     the time the cache write is about to start the cache write will
     operate on a folio that might already have been reused. Fix this by
     preactivating the cache write.

   - Limit cachefiles subreq size for cache writes to MAX_RW_COUNT"

* tag 'vfs-6.11-rc1.fixes.2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  inode: clarify what's locked
  vfs: Fix potential circular locking through setxattr() and removexattr()
  filelock: Fix fcntl/close race recovery compat path
  fs: use all available ids
  cachefiles: Set the max subreq size for cache writes to MAX_RW_COUNT
  netfs: Fix writeback that needs to go to both server and cache
  pidfs: add selftests for new namespace ioctls
  pidfs: handle kernels without namespaces cleanly
  pidfs: when time ns disabled add check for ioctl
  vfs: correct the comments of vfs_*() helpers
  vfs: handle __wait_on_freeing_inode() and evict() race
  netfs: Rename CONFIG_FSCACHE_DEBUG to CONFIG_NETFS_DEBUG
  netfs: Revert "netfs: Switch debug logging to pr_debug()"
parents e44be002 f5e5e97c
......@@ -630,7 +630,7 @@ static void cachefiles_prepare_write_subreq(struct netfs_io_subrequest *subreq)
_enter("W=%x[%x] %llx", wreq->debug_id, subreq->debug_index, subreq->start);
subreq->max_len = ULONG_MAX;
subreq->max_len = MAX_RW_COUNT;
subreq->max_nr_segs = BIO_MAX_VECS;
if (!cachefiles_cres_file(cres)) {
......
......@@ -676,6 +676,16 @@ static void evict(struct inode *inode)
remove_inode_hash(inode);
/*
* Wake up waiters in __wait_on_freeing_inode().
*
* Lockless hash lookup may end up finding the inode before we removed
* it above, but only lock it *after* we are done with the wakeup below.
* In this case the potential waiter cannot safely block.
*
* The inode being unhashed after the call to remove_inode_hash() is
* used as an indicator whether blocking on it is safe.
*/
spin_lock(&inode->i_lock);
wake_up_bit(&inode->i_state, __I_NEW);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
......@@ -888,18 +898,18 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
return freed;
}
static void __wait_on_freeing_inode(struct inode *inode, bool locked);
static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked);
/*
* Called with the inode lock held.
*/
static struct inode *find_inode(struct super_block *sb,
struct hlist_head *head,
int (*test)(struct inode *, void *),
void *data, bool locked)
void *data, bool is_inode_hash_locked)
{
struct inode *inode = NULL;
if (locked)
if (is_inode_hash_locked)
lockdep_assert_held(&inode_hash_lock);
else
lockdep_assert_not_held(&inode_hash_lock);
......@@ -913,7 +923,7 @@ static struct inode *find_inode(struct super_block *sb,
continue;
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
__wait_on_freeing_inode(inode, locked);
__wait_on_freeing_inode(inode, is_inode_hash_locked);
goto repeat;
}
if (unlikely(inode->i_state & I_CREATING)) {
......@@ -936,11 +946,11 @@ static struct inode *find_inode(struct super_block *sb,
*/
static struct inode *find_inode_fast(struct super_block *sb,
struct hlist_head *head, unsigned long ino,
bool locked)
bool is_inode_hash_locked)
{
struct inode *inode = NULL;
if (locked)
if (is_inode_hash_locked)
lockdep_assert_held(&inode_hash_lock);
else
lockdep_assert_not_held(&inode_hash_lock);
......@@ -954,7 +964,7 @@ static struct inode *find_inode_fast(struct super_block *sb,
continue;
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
__wait_on_freeing_inode(inode, locked);
__wait_on_freeing_inode(inode, is_inode_hash_locked);
goto repeat;
}
if (unlikely(inode->i_state & I_CREATING)) {
......@@ -2287,19 +2297,29 @@ EXPORT_SYMBOL(inode_needs_sync);
* wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
* will DTRT.
*/
static void __wait_on_freeing_inode(struct inode *inode, bool locked)
static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
{
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
/*
* Handle racing against evict(), see that routine for more details.
*/
if (unlikely(inode_unhashed(inode))) {
WARN_ON(is_inode_hash_locked);
spin_unlock(&inode->i_lock);
return;
}
wq = bit_waitqueue(&inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock);
rcu_read_unlock();
if (locked)
if (is_inode_hash_locked)
spin_unlock(&inode_hash_lock);
schedule();
finish_wait(wq, &wait.wq_entry);
if (locked)
if (is_inode_hash_locked)
spin_lock(&inode_hash_lock);
rcu_read_lock();
}
......
......@@ -2570,8 +2570,9 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
error = do_lock_file_wait(filp, cmd, file_lock);
/*
* Attempt to detect a close/fcntl race and recover by releasing the
* lock that was just acquired. There is no need to do that when we're
* Detect close/fcntl races and recover by zapping all POSIX locks
* associated with this file and our files_struct, just like on
* filp_flush(). There is no need to do that when we're
* unlocking though, or for OFD locks.
*/
if (!error && file_lock->c.flc_type != F_UNLCK &&
......@@ -2586,9 +2587,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
f = files_lookup_fd_locked(files, fd);
spin_unlock(&files->file_lock);
if (f != filp) {
file_lock->c.flc_type = F_UNLCK;
error = do_lock_file_wait(filp, cmd, file_lock);
WARN_ON_ONCE(error);
locks_remove_posix(filp, files);
error = -EBADF;
}
}
......
......@@ -3248,9 +3248,9 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
/**
* vfs_create - create new file
* @idmap: idmap of the mount the inode was found from
* @dir: inode of @dentry
* @dentry: pointer to dentry of the base directory
* @mode: mode of the new file
* @dir: inode of the parent directory
* @dentry: dentry of the child file
* @mode: mode of the child file
* @want_excl: whether the file must not yet exist
*
* Create a new file.
......@@ -4047,9 +4047,9 @@ EXPORT_SYMBOL(user_path_create);
/**
* vfs_mknod - create device node or file
* @idmap: idmap of the mount the inode was found from
* @dir: inode of @dentry
* @dentry: pointer to dentry of the base directory
* @mode: mode of the new device node or file
* @dir: inode of the parent directory
* @dentry: dentry of the child device node
* @mode: mode of the child device node
* @dev: device number of device to create
*
* Create a device node or file.
......@@ -4174,9 +4174,9 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
/**
* vfs_mkdir - create directory
* @idmap: idmap of the mount the inode was found from
* @dir: inode of @dentry
* @dentry: pointer to dentry of the base directory
* @mode: mode of the new directory
* @dir: inode of the parent directory
* @dentry: dentry of the child directory
* @mode: mode of the child directory
*
* Create a directory.
*
......@@ -4256,8 +4256,8 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
/**
* vfs_rmdir - remove directory
* @idmap: idmap of the mount the inode was found from
* @dir: inode of @dentry
* @dentry: pointer to dentry of the base directory
* @dir: inode of the parent directory
* @dentry: dentry of the child directory
*
* Remove a directory.
*
......@@ -4537,8 +4537,8 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname)
/**
* vfs_symlink - create symlink
* @idmap: idmap of the mount the inode was found from
* @dir: inode of @dentry
* @dentry: pointer to dentry of the base directory
* @dir: inode of the parent directory
* @dentry: dentry of the child symlink file
* @oldname: name of the file to link to
*
* Create a symlink.
......
......@@ -70,7 +70,7 @@ static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);
/* Don't allow confusion with old 32bit mount ID */
#define MNT_UNIQUE_ID_OFFSET (1ULL << 32)
#define MNT_UNIQUE_ID_OFFSET (1ULL << 31)
static atomic64_t mnt_id_ctr = ATOMIC64_INIT(MNT_UNIQUE_ID_OFFSET);
static struct hlist_head *mount_hashtable __ro_after_init;
......
......@@ -22,6 +22,14 @@ config NETFS_STATS
between CPUs. On the other hand, the stats are very useful for
debugging purposes. Saying 'Y' here is recommended.
config NETFS_DEBUG
bool "Enable dynamic debugging netfslib and FS-Cache"
depends on NETFS
help
This permits debugging to be dynamically enabled in the local caching
management module. If this is set, the debugging output may be
enabled by setting bits in /sys/module/netfs/parameters/debug.
config FSCACHE
bool "General filesystem local caching manager"
depends on NETFS_SUPPORT
......@@ -50,13 +58,3 @@ config FSCACHE_STATS
debugging purposes. Saying 'Y' here is recommended.
See Documentation/filesystems/caching/fscache.rst for more information.
config FSCACHE_DEBUG
bool "Debug FS-Cache"
depends on FSCACHE
help
This permits debugging to be dynamically enabled in the local caching
management module. If this is set, the debugging output may be
enabled by setting bits in /sys/modules/fscache/parameter/debug.
See Documentation/filesystems/caching/fscache.rst for more information.
......@@ -117,7 +117,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
if (folio->index == rreq->no_unlock_folio &&
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
kdebug("no unlock");
_debug("no unlock");
else
folio_unlock(folio);
}
......@@ -204,7 +204,7 @@ void netfs_readahead(struct readahead_control *ractl)
struct netfs_inode *ctx = netfs_inode(ractl->mapping->host);
int ret;
kenter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
_enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
if (readahead_count(ractl) == 0)
return;
......@@ -268,7 +268,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
struct folio *sink = NULL;
int ret;
kenter("%lx", folio->index);
_enter("%lx", folio->index);
rreq = netfs_alloc_request(mapping, file,
folio_pos(folio), folio_size(folio),
......@@ -508,7 +508,7 @@ int netfs_write_begin(struct netfs_inode *ctx,
have_folio:
*_folio = folio;
kleave(" = 0");
_leave(" = 0");
return 0;
error_put:
......@@ -518,7 +518,7 @@ int netfs_write_begin(struct netfs_inode *ctx,
folio_unlock(folio);
folio_put(folio);
}
kleave(" = %d", ret);
_leave(" = %d", ret);
return ret;
}
EXPORT_SYMBOL(netfs_write_begin);
......@@ -536,7 +536,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t flen = folio_size(folio);
int ret;
kenter("%zx @%llx", flen, start);
_enter("%zx @%llx", flen, start);
ret = -ENOMEM;
......@@ -567,7 +567,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
error_put:
netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
error:
kleave(" = %d", ret);
_leave(" = %d", ret);
return ret;
}
......
......@@ -56,7 +56,7 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
struct netfs_group *group = netfs_folio_group(folio);
loff_t pos = folio_pos(folio);
kenter("");
_enter("");
if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE)
return NETFS_FLUSH_CONTENT;
......@@ -272,12 +272,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
*/
howto = netfs_how_to_modify(ctx, file, folio, netfs_group,
flen, offset, part, maybe_trouble);
kdebug("howto %u", howto);
_debug("howto %u", howto);
switch (howto) {
case NETFS_JUST_PREFETCH:
ret = netfs_prefetch_for_write(file, folio, offset, part);
if (ret < 0) {
kdebug("prefetch = %zd", ret);
_debug("prefetch = %zd", ret);
goto error_folio_unlock;
}
break;
......@@ -418,7 +418,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
}
iocb->ki_pos += written;
kleave(" = %zd [%zd]", written, ret);
_leave(" = %zd [%zd]", written, ret);
return written ? written : ret;
error_folio_unlock:
......@@ -491,7 +491,7 @@ ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct netfs_inode *ictx = netfs_inode(inode);
ssize_t ret;
kenter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
_enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
if (!iov_iter_count(from))
return 0;
......@@ -529,7 +529,7 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
vm_fault_t ret = VM_FAULT_RETRY;
int err;
kenter("%lx", folio->index);
_enter("%lx", folio->index);
sb_start_pagefault(inode->i_sb);
......
......@@ -33,7 +33,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
size_t orig_count = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb);
kenter("");
_enter("");
if (!orig_count)
return 0; /* Don't update atime */
......
......@@ -37,7 +37,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
size_t len = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb);
kenter("");
_enter("");
/* We're going to need a bounce buffer if what we transmit is going to
* be different in some way to the source buffer, e.g. because it gets
......@@ -45,7 +45,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
*/
// TODO
kdebug("uw %llx-%llx", start, end);
_debug("uw %llx-%llx", start, end);
wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start,
iocb->ki_flags & IOCB_DIRECT ?
......@@ -96,7 +96,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
wreq->cleanup = netfs_cleanup_dio_write;
ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
if (ret < 0) {
kdebug("begin = %zd", ret);
_debug("begin = %zd", ret);
goto out;
}
......@@ -143,7 +143,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t pos = iocb->ki_pos;
unsigned long long end = pos + iov_iter_count(from) - 1;
kenter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
_enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
if (!iov_iter_count(from))
return 0;
......
......@@ -237,7 +237,7 @@ int fscache_add_cache(struct fscache_cache *cache,
{
int n_accesses;
kenter("{%s,%s}", ops->name, cache->name);
_enter("{%s,%s}", ops->name, cache->name);
BUG_ON(fscache_cache_state(cache) != FSCACHE_CACHE_IS_PREPARING);
......@@ -257,7 +257,7 @@ int fscache_add_cache(struct fscache_cache *cache,
up_write(&fscache_addremove_sem);
pr_notice("Cache \"%s\" added (type %s)\n", cache->name, ops->name);
kleave(" = 0 [%s]", cache->name);
_leave(" = 0 [%s]", cache->name);
return 0;
}
EXPORT_SYMBOL(fscache_add_cache);
......
......@@ -456,7 +456,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
{
struct fscache_cookie *cookie;
kenter("V=%x", volume->debug_id);
_enter("V=%x", volume->debug_id);
if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255)
return NULL;
......@@ -484,7 +484,7 @@ struct fscache_cookie *__fscache_acquire_cookie(
trace_fscache_acquire(cookie);
fscache_stat(&fscache_n_acquires_ok);
kleave(" = c=%08x", cookie->debug_id);
_leave(" = c=%08x", cookie->debug_id);
return cookie;
}
EXPORT_SYMBOL(__fscache_acquire_cookie);
......@@ -505,7 +505,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie)
enum fscache_access_trace trace = fscache_access_lookup_cookie_end_failed;
bool need_withdraw = false;
kenter("");
_enter("");
if (!cookie->volume->cache_priv) {
fscache_create_volume(cookie->volume, true);
......@@ -519,7 +519,7 @@ static void fscache_perform_lookup(struct fscache_cookie *cookie)
if (cookie->state != FSCACHE_COOKIE_STATE_FAILED)
fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT);
need_withdraw = true;
kleave(" [fail]");
_leave(" [fail]");
goto out;
}
......@@ -572,7 +572,7 @@ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify)
bool queue = false;
int n_active;
kenter("c=%08x", cookie->debug_id);
_enter("c=%08x", cookie->debug_id);
if (WARN(test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags),
"Trying to use relinquished cookie\n"))
......@@ -636,7 +636,7 @@ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify)
spin_unlock(&cookie->lock);
if (queue)
fscache_queue_cookie(cookie, fscache_cookie_get_use_work);
kleave("");
_leave("");
}
EXPORT_SYMBOL(__fscache_use_cookie);
......@@ -702,7 +702,7 @@ static void fscache_cookie_state_machine(struct fscache_cookie *cookie)
enum fscache_cookie_state state;
bool wake = false;
kenter("c=%x", cookie->debug_id);
_enter("c=%x", cookie->debug_id);
again:
spin_lock(&cookie->lock);
......@@ -820,7 +820,7 @@ static void fscache_cookie_state_machine(struct fscache_cookie *cookie)
spin_unlock(&cookie->lock);
if (wake)
wake_up_cookie_state(cookie);
kleave("");
_leave("");
}
static void fscache_cookie_worker(struct work_struct *work)
......@@ -867,7 +867,7 @@ static void fscache_cookie_lru_do_one(struct fscache_cookie *cookie)
set_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags);
spin_unlock(&cookie->lock);
fscache_stat(&fscache_n_cookies_lru_expired);
kdebug("lru c=%x", cookie->debug_id);
_debug("lru c=%x", cookie->debug_id);
__fscache_withdraw_cookie(cookie);
}
......@@ -971,7 +971,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
if (retire)
fscache_stat(&fscache_n_relinquishes_retire);
kenter("c=%08x{%d},%d",
_enter("c=%08x{%d},%d",
cookie->debug_id, atomic_read(&cookie->n_active), retire);
if (WARN(test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags),
......@@ -1050,7 +1050,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
{
bool is_caching;
kenter("c=%x", cookie->debug_id);
_enter("c=%x", cookie->debug_id);
fscache_stat(&fscache_n_invalidates);
......@@ -1072,7 +1072,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
case FSCACHE_COOKIE_STATE_INVALIDATING: /* is_still_valid will catch it */
default:
spin_unlock(&cookie->lock);
kleave(" [no %u]", cookie->state);
_leave(" [no %u]", cookie->state);
return;
case FSCACHE_COOKIE_STATE_LOOKING_UP:
......@@ -1081,7 +1081,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
fallthrough;
case FSCACHE_COOKIE_STATE_CREATING:
spin_unlock(&cookie->lock);
kleave(" [look %x]", cookie->inval_counter);
_leave(" [look %x]", cookie->inval_counter);
return;
case FSCACHE_COOKIE_STATE_ACTIVE:
......@@ -1094,7 +1094,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie,
if (is_caching)
fscache_queue_cookie(cookie, fscache_cookie_get_inval_work);
kleave(" [inv]");
_leave(" [inv]");
return;
}
}
......
......@@ -28,12 +28,12 @@ bool fscache_wait_for_operation(struct netfs_cache_resources *cres,
again:
if (!fscache_cache_is_live(cookie->volume->cache)) {
kleave(" [broken]");
_leave(" [broken]");
return false;
}
state = fscache_cookie_state(cookie);
kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
_enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_CREATING:
......@@ -52,7 +52,7 @@ bool fscache_wait_for_operation(struct netfs_cache_resources *cres,
case FSCACHE_COOKIE_STATE_DROPPED:
case FSCACHE_COOKIE_STATE_RELINQUISHING:
default:
kleave(" [not live]");
_leave(" [not live]");
return false;
}
......@@ -92,7 +92,7 @@ static int fscache_begin_operation(struct netfs_cache_resources *cres,
spin_lock(&cookie->lock);
state = fscache_cookie_state(cookie);
kenter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
_enter("c=%08x{%u},%x", cookie->debug_id, state, want_state);
switch (state) {
case FSCACHE_COOKIE_STATE_LOOKING_UP:
......@@ -140,7 +140,7 @@ static int fscache_begin_operation(struct netfs_cache_resources *cres,
cres->cache_priv = NULL;
cres->ops = NULL;
fscache_end_cookie_access(cookie, fscache_access_io_not_live);
kleave(" = -ENOBUFS");
_leave(" = -ENOBUFS");
return -ENOBUFS;
}
......@@ -224,7 +224,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
if (len == 0)
goto abandon;
kenter("%llx,%zx", start, len);
_enter("%llx,%zx", start, len);
wreq = kzalloc(sizeof(struct fscache_write_request), GFP_NOFS);
if (!wreq)
......
......@@ -99,7 +99,7 @@ int __init fscache_init(void)
*/
void __exit fscache_exit(void)
{
kenter("");
_enter("");
kmem_cache_destroy(fscache_cookie_jar);
fscache_proc_cleanup();
......
......@@ -264,7 +264,7 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
fscache_see_volume(volume, fscache_volume_new_acquire);
fscache_stat(&fscache_n_volumes);
up_write(&fscache_addremove_sem);
kleave(" = v=%x", volume->debug_id);
_leave(" = v=%x", volume->debug_id);
return volume;
err_vol:
......@@ -466,7 +466,7 @@ void fscache_withdraw_volume(struct fscache_volume *volume)
{
int n_accesses;
kdebug("withdraw V=%x", volume->debug_id);
_debug("withdraw V=%x", volume->debug_id);
/* Allow wakeups on dec-to-0 */
n_accesses = atomic_dec_return(&volume->n_accesses);
......
......@@ -34,6 +34,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync);
/*
* main.c
*/
extern unsigned int netfs_debug;
extern struct list_head netfs_io_requests;
extern spinlock_t netfs_proc_lock;
extern mempool_t netfs_request_pool;
......@@ -353,12 +354,42 @@ void fscache_create_volume(struct fscache_volume *volume, bool wait);
* debug tracing
*/
#define dbgprintk(FMT, ...) \
pr_debug("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
#ifdef __KDEBUG
#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
#elif defined(CONFIG_NETFS_DEBUG)
#define _enter(FMT, ...) \
do { \
if (netfs_debug) \
kenter(FMT, ##__VA_ARGS__); \
} while (0)
#define _leave(FMT, ...) \
do { \
if (netfs_debug) \
kleave(FMT, ##__VA_ARGS__); \
} while (0)
#define _debug(FMT, ...) \
do { \
if (netfs_debug) \
kdebug(FMT, ##__VA_ARGS__); \
} while (0)
#else
#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__)
#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
#endif
/*
* assertions
*/
......
......@@ -130,7 +130,7 @@ static void netfs_reset_subreq_iter(struct netfs_io_request *rreq,
if (count == remaining)
return;
kdebug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
_debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
rreq->debug_id, subreq->debug_index,
iov_iter_count(&subreq->io_iter), subreq->transferred,
subreq->len, rreq->i_size,
......@@ -326,7 +326,7 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
struct netfs_io_request *rreq = subreq->rreq;
int u;
kenter("R=%x[%x]{%llx,%lx},%zd",
_enter("R=%x[%x]{%llx,%lx},%zd",
rreq->debug_id, subreq->debug_index,
subreq->start, subreq->flags, transferred_or_error);
......@@ -435,7 +435,7 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
struct netfs_inode *ictx = netfs_inode(rreq->inode);
size_t lsize;
kenter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
_enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
if (rreq->origin != NETFS_DIO_READ) {
source = netfs_cache_prepare_read(subreq, rreq->i_size);
......@@ -518,7 +518,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
subreq->start = rreq->start + rreq->submitted;
subreq->len = io_iter->count;
kdebug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted);
_debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted);
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
/* Call out to the cache to find out what it can do with the remaining
......@@ -570,7 +570,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
struct iov_iter io_iter;
int ret;
kenter("R=%x %llx-%llx",
_enter("R=%x %llx-%llx",
rreq->debug_id, rreq->start, rreq->start + rreq->len - 1);
if (rreq->len == 0) {
......@@ -593,7 +593,7 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
atomic_set(&rreq->nr_outstanding, 1);
io_iter = rreq->io_iter;
do {
kdebug("submit %llx + %llx >= %llx",
_debug("submit %llx + %llx >= %llx",
rreq->start, rreq->submitted, rreq->i_size);
if (rreq->origin == NETFS_DIO_READ &&
rreq->start + rreq->submitted >= rreq->i_size)
......
......@@ -20,6 +20,10 @@ MODULE_LICENSE("GPL");
EXPORT_TRACEPOINT_SYMBOL(netfs_sreq);
unsigned netfs_debug;
module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");
static struct kmem_cache *netfs_request_slab;
static struct kmem_cache *netfs_subrequest_slab;
mempool_t netfs_request_pool;
......
......@@ -26,7 +26,7 @@ bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio)
struct fscache_cookie *cookie = netfs_i_cookie(ictx);
bool need_use = false;
kenter("");
_enter("");
if (!filemap_dirty_folio(mapping, folio))
return false;
......@@ -99,7 +99,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
struct netfs_folio *finfo;
size_t flen = folio_size(folio);
kenter("{%lx},%zx,%zx", folio->index, offset, length);
_enter("{%lx},%zx,%zx", folio->index, offset, length);
if (!folio_test_private(folio))
return;
......
......@@ -161,7 +161,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
{
struct list_head *next;
kenter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
_enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
if (list_empty(&stream->subrequests))
return;
......@@ -374,7 +374,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
unsigned int notes;
int s;
kenter("%llx-%llx", wreq->start, wreq->start + wreq->len);
_enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
trace_netfs_collect(wreq);
trace_netfs_rreq(wreq, netfs_rreq_trace_collect);
......@@ -409,7 +409,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
front = stream->front;
while (front) {
trace_netfs_collect_sreq(wreq, front);
//kdebug("sreq [%x] %llx %zx/%zx",
//_debug("sreq [%x] %llx %zx/%zx",
// front->debug_index, front->start, front->transferred, front->len);
/* Stall if there may be a discontinuity. */
......@@ -598,7 +598,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
out:
netfs_put_group_many(wreq->group, wreq->nr_group_rel);
wreq->nr_group_rel = 0;
kleave(" = %x", notes);
_leave(" = %x", notes);
return;
need_retry:
......@@ -606,7 +606,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)
* that any partially completed op will have had any wholly transferred
* folios removed from it.
*/
kdebug("retry");
_debug("retry");
netfs_retry_writes(wreq);
goto out;
}
......@@ -621,7 +621,7 @@ void netfs_write_collection_worker(struct work_struct *work)
size_t transferred;
int s;
kenter("R=%x", wreq->debug_id);
_enter("R=%x", wreq->debug_id);
netfs_see_request(wreq, netfs_rreq_trace_see_work);
if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) {
......@@ -684,7 +684,7 @@ void netfs_write_collection_worker(struct work_struct *work)
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_end(wreq->inode);
kdebug("finished");
_debug("finished");
trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
......@@ -744,7 +744,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
struct netfs_io_request *wreq = subreq->rreq;
struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
kenter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
_enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
switch (subreq->source) {
case NETFS_UPLOAD_TO_SERVER:
......
......@@ -99,7 +99,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
if (IS_ERR(wreq))
return wreq;
kenter("R=%x", wreq->debug_id);
_enter("R=%x", wreq->debug_id);
ictx = netfs_inode(wreq->inode);
if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
......@@ -122,6 +122,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
wreq->io_streams[1].transferred = LONG_MAX;
if (fscache_resources_valid(&wreq->cache_resources)) {
wreq->io_streams[1].avail = true;
wreq->io_streams[1].active = true;
wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq;
wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write;
}
......@@ -159,7 +160,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
subreq->max_nr_segs = INT_MAX;
subreq->stream_nr = stream->stream_nr;
kenter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
_enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
......@@ -215,7 +216,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream,
{
struct netfs_io_request *wreq = subreq->rreq;
kenter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
_enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
return netfs_write_subrequest_terminated(subreq, subreq->error, false);
......@@ -272,11 +273,11 @@ int netfs_advance_write(struct netfs_io_request *wreq,
size_t part;
if (!stream->avail) {
kleave("no write");
_leave("no write");
return len;
}
kenter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
_enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
if (subreq && start != subreq->start + subreq->len) {
netfs_issue_write(wreq, stream);
......@@ -288,7 +289,7 @@ int netfs_advance_write(struct netfs_io_request *wreq,
subreq = stream->construct;
part = min(subreq->max_len - subreq->len, len);
kdebug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len);
_debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len);
subreq->len += part;
subreq->nr_segs++;
......@@ -319,7 +320,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
bool to_eof = false, streamw = false;
bool debug = false;
kenter("");
_enter("");
/* netfs_perform_write() may shift i_size around the page or from out
* of the page to beyond it, but cannot move i_size into or through the
......@@ -329,7 +330,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
if (fpos >= i_size) {
/* mmap beyond eof. */
kdebug("beyond eof");
_debug("beyond eof");
folio_start_writeback(folio);
folio_unlock(folio);
wreq->nr_group_rel += netfs_folio_written_back(folio);
......@@ -363,7 +364,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
}
flen -= foff;
kdebug("folio %zx %zx %zx", foff, flen, fsize);
_debug("folio %zx %zx %zx", foff, flen, fsize);
/* Deal with discontinuities in the stream of dirty pages. These can
* arise from a number of sources:
......@@ -487,7 +488,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
for (int s = 0; s < NR_IO_STREAMS; s++)
netfs_issue_write(wreq, &wreq->io_streams[s]);
kleave(" = 0");
_leave(" = 0");
return 0;
}
......@@ -522,7 +523,7 @@ int netfs_writepages(struct address_space *mapping,
netfs_stat(&netfs_n_wh_writepages);
do {
kdebug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted);
_debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted);
/* It appears we don't have to handle cyclic writeback wrapping. */
WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted);
......@@ -546,14 +547,14 @@ int netfs_writepages(struct address_space *mapping,
mutex_unlock(&ictx->wb_lock);
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
kleave(" = %d", error);
_leave(" = %d", error);
return error;
couldnt_start:
netfs_kill_dirty_pages(mapping, wbc, folio);
out:
mutex_unlock(&ictx->wb_lock);
kleave(" = %d", error);
_leave(" = %d", error);
return error;
}
EXPORT_SYMBOL(netfs_writepages);
......@@ -590,7 +591,7 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
struct folio *folio, size_t copied, bool to_page_end,
struct folio **writethrough_cache)
{
kenter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
_enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end);
if (!*writethrough_cache) {
......@@ -624,7 +625,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
struct netfs_inode *ictx = netfs_inode(wreq->inode);
int ret;
kenter("R=%x", wreq->debug_id);
_enter("R=%x", wreq->debug_id);
if (writethrough_cache)
netfs_write_folio(wreq, wbc, writethrough_cache);
......@@ -657,7 +658,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
loff_t start = wreq->start;
int error = 0;
kenter("%zx", len);
_enter("%zx", len);
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_begin(wreq->inode);
......@@ -665,7 +666,7 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
while (len) {
// TODO: Prepare content encryption
kdebug("unbuffered %zx", len);
_debug("unbuffered %zx", len);
part = netfs_advance_write(wreq, upload, start, len, false);
start += part;
len -= part;
......@@ -684,6 +685,6 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
if (list_empty(&upload->subrequests))
netfs_wake_write_collector(wreq, false);
kleave(" = %d", error);
_leave(" = %d", error);
return error;
}
......@@ -119,7 +119,7 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct task_struct *task __free(put_task) = NULL;
struct nsproxy *nsp __free(put_nsproxy) = NULL;
struct pid *pid = pidfd_pid(file);
struct ns_common *ns_common;
struct ns_common *ns_common = NULL;
if (arg)
return -EINVAL;
......@@ -146,52 +146,73 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
switch (cmd) {
/* Namespaces that hang of nsproxy. */
case PIDFD_GET_CGROUP_NAMESPACE:
get_cgroup_ns(nsp->cgroup_ns);
ns_common = to_ns_common(nsp->cgroup_ns);
if (IS_ENABLED(CONFIG_CGROUPS)) {
get_cgroup_ns(nsp->cgroup_ns);
ns_common = to_ns_common(nsp->cgroup_ns);
}
break;
case PIDFD_GET_IPC_NAMESPACE:
get_ipc_ns(nsp->ipc_ns);
ns_common = to_ns_common(nsp->ipc_ns);
if (IS_ENABLED(CONFIG_IPC_NS)) {
get_ipc_ns(nsp->ipc_ns);
ns_common = to_ns_common(nsp->ipc_ns);
}
break;
case PIDFD_GET_MNT_NAMESPACE:
get_mnt_ns(nsp->mnt_ns);
ns_common = to_ns_common(nsp->mnt_ns);
break;
case PIDFD_GET_NET_NAMESPACE:
ns_common = to_ns_common(nsp->net_ns);
get_net_ns(ns_common);
if (IS_ENABLED(CONFIG_NET_NS)) {
ns_common = to_ns_common(nsp->net_ns);
get_net_ns(ns_common);
}
break;
case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
get_pid_ns(nsp->pid_ns_for_children);
ns_common = to_ns_common(nsp->pid_ns_for_children);
if (IS_ENABLED(CONFIG_PID_NS)) {
get_pid_ns(nsp->pid_ns_for_children);
ns_common = to_ns_common(nsp->pid_ns_for_children);
}
break;
case PIDFD_GET_TIME_NAMESPACE:
get_time_ns(nsp->time_ns);
ns_common = to_ns_common(nsp->time_ns);
if (IS_ENABLED(CONFIG_TIME_NS)) {
get_time_ns(nsp->time_ns);
ns_common = to_ns_common(nsp->time_ns);
}
break;
case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
get_time_ns(nsp->time_ns_for_children);
ns_common = to_ns_common(nsp->time_ns_for_children);
if (IS_ENABLED(CONFIG_TIME_NS)) {
get_time_ns(nsp->time_ns_for_children);
ns_common = to_ns_common(nsp->time_ns_for_children);
}
break;
case PIDFD_GET_UTS_NAMESPACE:
get_uts_ns(nsp->uts_ns);
ns_common = to_ns_common(nsp->uts_ns);
if (IS_ENABLED(CONFIG_UTS_NS)) {
get_uts_ns(nsp->uts_ns);
ns_common = to_ns_common(nsp->uts_ns);
}
break;
/* Namespaces that don't hang of nsproxy. */
case PIDFD_GET_USER_NAMESPACE:
rcu_read_lock();
ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
rcu_read_unlock();
if (IS_ENABLED(CONFIG_USER_NS)) {
rcu_read_lock();
ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
rcu_read_unlock();
}
break;
case PIDFD_GET_PID_NAMESPACE:
rcu_read_lock();
ns_common = to_ns_common(get_pid_ns(task_active_pid_ns(task)));
rcu_read_unlock();
if (IS_ENABLED(CONFIG_PID_NS)) {
rcu_read_lock();
ns_common = to_ns_common( get_pid_ns(task_active_pid_ns(task)));
rcu_read_unlock();
}
break;
default:
return -ENOIOCTLCMD;
}
if (!ns_common)
return -EOPNOTSUPP;
/* open_namespace() unconditionally consumes the reference */
return open_namespace(ns_common);
}
......
......@@ -630,10 +630,9 @@ int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
ctx->kvalue, ctx->size, ctx->flags);
}
static long
setxattr(struct mnt_idmap *idmap, struct dentry *d,
const char __user *name, const void __user *value, size_t size,
int flags)
static int path_setxattr(const char __user *pathname,
const char __user *name, const void __user *value,
size_t size, int flags, unsigned int lookup_flags)
{
struct xattr_name kname;
struct xattr_ctx ctx = {
......@@ -643,33 +642,20 @@ setxattr(struct mnt_idmap *idmap, struct dentry *d,
.kname = &kname,
.flags = flags,
};
struct path path;
int error;
error = setxattr_copy(name, &ctx);
if (error)
return error;
error = do_setxattr(idmap, d, &ctx);
kvfree(ctx.kvalue);
return error;
}
static int path_setxattr(const char __user *pathname,
const char __user *name, const void __user *value,
size_t size, int flags, unsigned int lookup_flags)
{
struct path path;
int error;
retry:
error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
if (error)
return error;
goto out;
error = mnt_want_write(path.mnt);
if (!error) {
error = setxattr(mnt_idmap(path.mnt), path.dentry, name,
value, size, flags);
error = do_setxattr(mnt_idmap(path.mnt), path.dentry, &ctx);
mnt_drop_write(path.mnt);
}
path_put(&path);
......@@ -677,6 +663,9 @@ static int path_setxattr(const char __user *pathname,
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
out:
kvfree(ctx.kvalue);
return error;
}
......@@ -697,20 +686,32 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
const void __user *,value, size_t, size, int, flags)
{
struct fd f = fdget(fd);
int error = -EBADF;
struct xattr_name kname;
struct xattr_ctx ctx = {
.cvalue = value,
.kvalue = NULL,
.size = size,
.kname = &kname,
.flags = flags,
};
int error;
CLASS(fd, f)(fd);
if (!f.file)
return error;
return -EBADF;
audit_file(f.file);
error = setxattr_copy(name, &ctx);
if (error)
return error;
error = mnt_want_write_file(f.file);
if (!error) {
error = setxattr(file_mnt_idmap(f.file),
f.file->f_path.dentry, name,
value, size, flags);
error = do_setxattr(file_mnt_idmap(f.file),
f.file->f_path.dentry, &ctx);
mnt_drop_write_file(f.file);
}
fdput(f);
kvfree(ctx.kvalue);
return error;
}
......@@ -899,9 +900,17 @@ SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
* Extended attribute REMOVE operations
*/
static long
removexattr(struct mnt_idmap *idmap, struct dentry *d,
const char __user *name)
removexattr(struct mnt_idmap *idmap, struct dentry *d, const char *name)
{
if (is_posix_acl_xattr(name))
return vfs_remove_acl(idmap, d, name);
return vfs_removexattr(idmap, d, name);
}
static int path_removexattr(const char __user *pathname,
const char __user *name, unsigned int lookup_flags)
{
struct path path;
int error;
char kname[XATTR_NAME_MAX + 1];
......@@ -910,25 +919,13 @@ removexattr(struct mnt_idmap *idmap, struct dentry *d,
error = -ERANGE;
if (error < 0)
return error;
if (is_posix_acl_xattr(kname))
return vfs_remove_acl(idmap, d, kname);
return vfs_removexattr(idmap, d, kname);
}
static int path_removexattr(const char __user *pathname,
const char __user *name, unsigned int lookup_flags)
{
struct path path;
int error;
retry:
error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
if (error)
return error;
error = mnt_want_write(path.mnt);
if (!error) {
error = removexattr(mnt_idmap(path.mnt), path.dentry, name);
error = removexattr(mnt_idmap(path.mnt), path.dentry, kname);
mnt_drop_write(path.mnt);
}
path_put(&path);
......@@ -954,15 +951,23 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
{
struct fd f = fdget(fd);
char kname[XATTR_NAME_MAX + 1];
int error = -EBADF;
if (!f.file)
return error;
audit_file(f.file);
error = strncpy_from_user(kname, name, sizeof(kname));
if (error == 0 || error == sizeof(kname))
error = -ERANGE;
if (error < 0)
return error;
error = mnt_want_write_file(f.file);
if (!error) {
error = removexattr(file_mnt_idmap(f.file),
f.file->f_path.dentry, name);
f.file->f_path.dentry, kname);
mnt_drop_write_file(f.file);
}
fdput(f);
......
......@@ -16,11 +16,56 @@
#include <unistd.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <linux/ioctl.h>
#include "pidfd.h"
#include "../clone3/clone3_selftests.h"
#include "../kselftest_harness.h"
#ifndef PIDFS_IOCTL_MAGIC
#define PIDFS_IOCTL_MAGIC 0xFF
#endif
#ifndef PIDFD_GET_CGROUP_NAMESPACE
#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
#endif
#ifndef PIDFD_GET_IPC_NAMESPACE
#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
#endif
#ifndef PIDFD_GET_MNT_NAMESPACE
#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
#endif
#ifndef PIDFD_GET_NET_NAMESPACE
#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
#endif
#ifndef PIDFD_GET_PID_NAMESPACE
#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
#endif
#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE
#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
#endif
#ifndef PIDFD_GET_TIME_NAMESPACE
#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
#endif
#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE
#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
#endif
#ifndef PIDFD_GET_USER_NAMESPACE
#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
#endif
#ifndef PIDFD_GET_UTS_NAMESPACE
#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
#endif
enum {
PIDFD_NS_USER,
PIDFD_NS_MNT,
......@@ -31,22 +76,25 @@ enum {
PIDFD_NS_CGROUP,
PIDFD_NS_PIDCLD,
PIDFD_NS_TIME,
PIDFD_NS_TIMECLD,
PIDFD_NS_MAX
};
const struct ns_info {
const char *name;
int flag;
unsigned int pidfd_ioctl;
} ns_info[] = {
[PIDFD_NS_USER] = { "user", CLONE_NEWUSER, },
[PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, },
[PIDFD_NS_PID] = { "pid", CLONE_NEWPID, },
[PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, },
[PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, },
[PIDFD_NS_NET] = { "net", CLONE_NEWNET, },
[PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, },
[PIDFD_NS_PIDCLD] = { "pid_for_children", 0, },
[PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, },
[PIDFD_NS_USER] = { "user", CLONE_NEWUSER, PIDFD_GET_USER_NAMESPACE, },
[PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, PIDFD_GET_MNT_NAMESPACE, },
[PIDFD_NS_PID] = { "pid", CLONE_NEWPID, PIDFD_GET_PID_NAMESPACE, },
[PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, PIDFD_GET_UTS_NAMESPACE, },
[PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, PIDFD_GET_IPC_NAMESPACE, },
[PIDFD_NS_NET] = { "net", CLONE_NEWNET, PIDFD_GET_NET_NAMESPACE, },
[PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, },
[PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, PIDFD_GET_TIME_NAMESPACE, },
[PIDFD_NS_PIDCLD] = { "pid_for_children", 0, PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE, },
[PIDFD_NS_TIMECLD] = { "time_for_children", 0, PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE, },
};
FIXTURE(current_nsset)
......@@ -54,6 +102,7 @@ FIXTURE(current_nsset)
pid_t pid;
int pidfd;
int nsfds[PIDFD_NS_MAX];
int child_pidfd_derived_nsfds[PIDFD_NS_MAX];
pid_t child_pid_exited;
int child_pidfd_exited;
......@@ -61,10 +110,12 @@ FIXTURE(current_nsset)
pid_t child_pid1;
int child_pidfd1;
int child_nsfds1[PIDFD_NS_MAX];
int child_pidfd_derived_nsfds1[PIDFD_NS_MAX];
pid_t child_pid2;
int child_pidfd2;
int child_nsfds2[PIDFD_NS_MAX];
int child_pidfd_derived_nsfds2[PIDFD_NS_MAX];
};
static int sys_waitid(int which, pid_t pid, int options)
......@@ -128,9 +179,12 @@ FIXTURE_SETUP(current_nsset)
char c;
for (i = 0; i < PIDFD_NS_MAX; i++) {
self->nsfds[i] = -EBADF;
self->child_nsfds1[i] = -EBADF;
self->child_nsfds2[i] = -EBADF;
self->nsfds[i] = -EBADF;
self->child_nsfds1[i] = -EBADF;
self->child_nsfds2[i] = -EBADF;
self->child_pidfd_derived_nsfds[i] = -EBADF;
self->child_pidfd_derived_nsfds1[i] = -EBADF;
self->child_pidfd_derived_nsfds2[i] = -EBADF;
}
proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
......@@ -139,6 +193,11 @@ FIXTURE_SETUP(current_nsset)
}
self->pid = getpid();
self->pidfd = sys_pidfd_open(self->pid, 0);
EXPECT_GT(self->pidfd, 0) {
TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
}
for (i = 0; i < PIDFD_NS_MAX; i++) {
const struct ns_info *info = &ns_info[i];
self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
......@@ -148,20 +207,27 @@ FIXTURE_SETUP(current_nsset)
info->name, self->pid);
}
}
}
self->pidfd = sys_pidfd_open(self->pid, 0);
EXPECT_GT(self->pidfd, 0) {
TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
self->child_pidfd_derived_nsfds[i] = ioctl(self->pidfd, info->pidfd_ioctl, 0);
if (self->child_pidfd_derived_nsfds[i] < 0) {
EXPECT_EQ(errno, EOPNOTSUPP) {
TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d",
info->name, self->pid);
}
}
}
/* Create task that exits right away. */
self->child_pid_exited = create_child(&self->child_pidfd_exited,
CLONE_NEWUSER | CLONE_NEWNET);
self->child_pid_exited = create_child(&self->child_pidfd_exited, 0);
EXPECT_GE(self->child_pid_exited, 0);
if (self->child_pid_exited == 0)
if (self->child_pid_exited == 0) {
if (self->nsfds[PIDFD_NS_USER] >= 0 && unshare(CLONE_NEWUSER) < 0)
_exit(EXIT_FAILURE);
if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0)
_exit(EXIT_FAILURE);
_exit(EXIT_SUCCESS);
}
ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
......@@ -174,18 +240,43 @@ FIXTURE_SETUP(current_nsset)
EXPECT_EQ(ret, 0);
/* Create tasks that will be stopped. */
self->child_pid1 = create_child(&self->child_pidfd1,
CLONE_NEWUSER | CLONE_NEWNS |
CLONE_NEWCGROUP | CLONE_NEWIPC |
CLONE_NEWUTS | CLONE_NEWPID |
CLONE_NEWNET);
if (self->nsfds[PIDFD_NS_USER] >= 0 && self->nsfds[PIDFD_NS_PID] >= 0)
self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER | CLONE_NEWPID);
else if (self->nsfds[PIDFD_NS_PID] >= 0)
self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWPID);
else if (self->nsfds[PIDFD_NS_USER] >= 0)
self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER);
else
self->child_pid1 = create_child(&self->child_pidfd1, 0);
EXPECT_GE(self->child_pid1, 0);
if (self->child_pid1 == 0) {
close(ipc_sockets[0]);
if (!switch_timens())
if (self->nsfds[PIDFD_NS_MNT] >= 0 && unshare(CLONE_NEWNS) < 0) {
TH_LOG("%m - Failed to unshare mount namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
}
if (self->nsfds[PIDFD_NS_CGROUP] >= 0 && unshare(CLONE_NEWCGROUP) < 0) {
TH_LOG("%m - Failed to unshare cgroup namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
}
if (self->nsfds[PIDFD_NS_IPC] >= 0 && unshare(CLONE_NEWIPC) < 0) {
TH_LOG("%m - Failed to unshare ipc namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
}
if (self->nsfds[PIDFD_NS_UTS] >= 0 && unshare(CLONE_NEWUTS) < 0) {
TH_LOG("%m - Failed to unshare uts namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
}
if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) {
TH_LOG("%m - Failed to unshare net namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
}
if (self->nsfds[PIDFD_NS_TIME] >= 0 && !switch_timens()) {
TH_LOG("%m - Failed to unshare time namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
}
if (write_nointr(ipc_sockets[1], "1", 1) < 0)
_exit(EXIT_FAILURE);
......@@ -203,18 +294,43 @@ FIXTURE_SETUP(current_nsset)
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
EXPECT_EQ(ret, 0);
self->child_pid2 = create_child(&self->child_pidfd2,
CLONE_NEWUSER | CLONE_NEWNS |
CLONE_NEWCGROUP | CLONE_NEWIPC |
CLONE_NEWUTS | CLONE_NEWPID |
CLONE_NEWNET);
if (self->nsfds[PIDFD_NS_USER] >= 0 && self->nsfds[PIDFD_NS_PID] >= 0)
self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID);
else if (self->nsfds[PIDFD_NS_PID] >= 0)
self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWPID);
else if (self->nsfds[PIDFD_NS_USER] >= 0)
self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER);
else
self->child_pid2 = create_child(&self->child_pidfd2, 0);
EXPECT_GE(self->child_pid2, 0);
if (self->child_pid2 == 0) {
close(ipc_sockets[0]);
if (!switch_timens())
if (self->nsfds[PIDFD_NS_MNT] >= 0 && unshare(CLONE_NEWNS) < 0) {
TH_LOG("%m - Failed to unshare mount namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
}
if (self->nsfds[PIDFD_NS_CGROUP] >= 0 && unshare(CLONE_NEWCGROUP) < 0) {
TH_LOG("%m - Failed to unshare cgroup namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
}
if (self->nsfds[PIDFD_NS_IPC] >= 0 && unshare(CLONE_NEWIPC) < 0) {
TH_LOG("%m - Failed to unshare ipc namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
}
if (self->nsfds[PIDFD_NS_UTS] >= 0 && unshare(CLONE_NEWUTS) < 0) {
TH_LOG("%m - Failed to unshare uts namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
}
if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) {
TH_LOG("%m - Failed to unshare net namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
}
if (self->nsfds[PIDFD_NS_TIME] >= 0 && !switch_timens()) {
TH_LOG("%m - Failed to unshare time namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
}
if (write_nointr(ipc_sockets[1], "1", 1) < 0)
_exit(EXIT_FAILURE);
......@@ -267,6 +383,22 @@ FIXTURE_SETUP(current_nsset)
info->name, self->child_pid1);
}
}
self->child_pidfd_derived_nsfds1[i] = ioctl(self->child_pidfd1, info->pidfd_ioctl, 0);
if (self->child_pidfd_derived_nsfds1[i] < 0) {
EXPECT_EQ(errno, EOPNOTSUPP) {
TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d",
info->name, self->child_pid1);
}
}
self->child_pidfd_derived_nsfds2[i] = ioctl(self->child_pidfd2, info->pidfd_ioctl, 0);
if (self->child_pidfd_derived_nsfds2[i] < 0) {
EXPECT_EQ(errno, EOPNOTSUPP) {
TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d",
info->name, self->child_pid2);
}
}
}
close(proc_fd);
......@@ -288,6 +420,12 @@ FIXTURE_TEARDOWN(current_nsset)
close(self->child_nsfds1[i]);
if (self->child_nsfds2[i] >= 0)
close(self->child_nsfds2[i]);
if (self->child_pidfd_derived_nsfds[i] >= 0)
close(self->child_pidfd_derived_nsfds[i]);
if (self->child_pidfd_derived_nsfds1[i] >= 0)
close(self->child_pidfd_derived_nsfds1[i]);
if (self->child_pidfd_derived_nsfds2[i] >= 0)
close(self->child_pidfd_derived_nsfds2[i]);
}
if (self->child_pidfd1 >= 0)
......@@ -446,6 +584,42 @@ TEST_F(current_nsset, nsfd_incremental_setns)
}
}
TEST_F(current_nsset, pidfd_derived_nsfd_incremental_setns)
{
int i;
pid_t pid;
pid = getpid();
for (i = 0; i < PIDFD_NS_MAX; i++) {
const struct ns_info *info = &ns_info[i];
int nsfd;
if (self->child_pidfd_derived_nsfds1[i] < 0)
continue;
if (info->flag) {
ASSERT_EQ(setns(self->child_pidfd_derived_nsfds1[i], info->flag), 0) {
TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
info->name, self->child_pid1,
self->child_pidfd_derived_nsfds1[i]);
}
}
/* Verify that we have changed to the correct namespaces. */
if (info->flag == CLONE_NEWPID)
nsfd = self->child_pidfd_derived_nsfds[i];
else
nsfd = self->child_pidfd_derived_nsfds1[i];
ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
info->name, self->child_pid1,
self->child_pidfd_derived_nsfds1[i]);
}
TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
info->name, self->child_pid1, self->child_pidfd_derived_nsfds1[i]);
}
}
TEST_F(current_nsset, pidfd_one_shot_setns)
{
unsigned flags = 0;
......@@ -542,6 +716,28 @@ TEST_F(current_nsset, no_foul_play)
info->name, self->child_pid2,
self->child_nsfds2[i]);
}
/*
* Can't setns to a user namespace outside of our hierarchy since we
* don't have caps in there and didn't create it. That means that under
* no circumstances should we be able to setns to any of the other
* ones since they aren't owned by our user namespace.
*/
for (i = 0; i < PIDFD_NS_MAX; i++) {
const struct ns_info *info = &ns_info[i];
if (self->child_pidfd_derived_nsfds2[i] < 0 || !info->flag)
continue;
ASSERT_NE(setns(self->child_pidfd_derived_nsfds2[i], info->flag), 0) {
TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
info->name, self->child_pid2,
self->child_pidfd_derived_nsfds2[i]);
}
TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
info->name, self->child_pid2,
self->child_pidfd_derived_nsfds2[i]);
}
}
TEST(setns_einval)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment