Commit b1b07ba3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-5.18-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs updates from Darrick Wong:
 "The biggest change this cycle is bringing XFS' inode attribute setting
  code back towards alignment with what the VFS does. IOWs, setgid bit
  handling should be a closer match with ext4 and btrfs behavior.

  The rest of the branch is bug fixes around the filesystem -- patching
  gaps in quota enforcement, removing bogus selinux audit messages, and
  fixing log corruption and problems with log recovery. There will be a
  second pull request later on in the merge window with more bug fixes.

  Dave Chinner will be taking over as XFS maintainer for one release
  cycle, starting from the day 5.18-rc1 drops until 5.19-rc1 is tagged
  so that I can focus on starting a massive design review for the
  (feature complete after five years) online repair feature.

  Summary:

   - Fix some incorrect mapping state being passed to iomap during COW

   - Don't create bogus selinux audit messages when deciding to degrade
     gracefully due to lack of privilege

   - Fix setattr implementation to use VFS helpers so that we drop
     setgid consistently with the other filesystems

   - Fix link/unlink/rename to check quota limits

   - Constify xfs_name_dotdot to prevent abuse of in-kernel symbols

   - Fix log livelock between the AIL and inodegc threads during
     recovery

   - Fix a log stall when the AIL races with pushers

   - Fix stalls in CIL flushes due to pinned inode cluster buffers
     during recovery

   - Fix log corruption due to incorrect usage of xfs_is_shutdown vs
     xlog_is_shutdown because during an induced fs shutdown, AIL
     writeback must continue until the log is shut down, even if the
     filesystem has already shut down"

* tag 'xfs-5.18-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: xfs_is_shutdown vs xlog_is_shutdown cage fight
  xfs: AIL should be log centric
  xfs: log items should have a xlog pointer, not a mount
  xfs: async CIL flushes need pending pushes to be made stable
  xfs: xfs_ail_push_all_sync() stalls when racing with updates
  xfs: check buffer pin state after locking in delwri_submit
  xfs: log worker needs to start before intent/unlink recovery
  xfs: constify xfs_name_dotdot
  xfs: constify the name argument to various directory functions
  xfs: reserve quota for target dir expansion when renaming files
  xfs: reserve quota for dir expansion when linking/unlinking files
  xfs: refactor user/group quota chown in xfs_setattr_nonsize
  xfs: use setattr_copy to set vfs inode attributes
  xfs: don't generate selinux audit messages for capability testing
  xfs: add missing cmap->br_state = XFS_EXT_NORM update
parents f0614eef 01728b44
......@@ -19,7 +19,11 @@
#include "xfs_error.h"
#include "xfs_trace.h"
struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
const struct xfs_name xfs_name_dotdot = {
.name = (const unsigned char *)"..",
.len = 2,
.type = XFS_DIR3_FT_DIR,
};
/*
* Convert inode mode to directory entry filetype
......@@ -54,10 +58,10 @@ xfs_mode_to_ftype(
*/
xfs_dahash_t
xfs_ascii_ci_hashname(
struct xfs_name *name)
const struct xfs_name *name)
{
xfs_dahash_t hash;
int i;
xfs_dahash_t hash;
int i;
for (i = 0, hash = 0; i < name->len; i++)
hash = tolower(name->name[i]) ^ rol32(hash, 7);
......@@ -243,7 +247,7 @@ int
xfs_dir_createname(
struct xfs_trans *tp,
struct xfs_inode *dp,
struct xfs_name *name,
const struct xfs_name *name,
xfs_ino_t inum, /* new entry inode number */
xfs_extlen_t total) /* bmap's total block count */
{
......@@ -337,16 +341,16 @@ xfs_dir_cilookup_result(
int
xfs_dir_lookup(
xfs_trans_t *tp,
xfs_inode_t *dp,
struct xfs_name *name,
xfs_ino_t *inum, /* out: inode number */
struct xfs_name *ci_name) /* out: actual name if CI match */
struct xfs_trans *tp,
struct xfs_inode *dp,
const struct xfs_name *name,
xfs_ino_t *inum, /* out: inode number */
struct xfs_name *ci_name) /* out: actual name if CI match */
{
struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
int lock_mode;
struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
int lock_mode;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
XFS_STATS_INC(dp->i_mount, xs_dir_lookup);
......@@ -475,7 +479,7 @@ int
xfs_dir_replace(
struct xfs_trans *tp,
struct xfs_inode *dp,
struct xfs_name *name, /* name of entry to replace */
const struct xfs_name *name, /* name of entry to replace */
xfs_ino_t inum, /* new inode number */
xfs_extlen_t total) /* bmap's total block count */
{
......@@ -728,7 +732,7 @@ xfs_dir2_namecheck(
xfs_dahash_t
xfs_dir2_hashname(
struct xfs_mount *mp,
struct xfs_name *name)
const struct xfs_name *name)
{
if (unlikely(xfs_has_asciici(mp)))
return xfs_ascii_ci_hashname(name);
......
......@@ -21,7 +21,7 @@ struct xfs_dir2_data_unused;
struct xfs_dir3_icfree_hdr;
struct xfs_dir3_icleaf_hdr;
extern struct xfs_name xfs_name_dotdot;
extern const struct xfs_name xfs_name_dotdot;
/*
* Convert inode mode to directory entry filetype
......@@ -39,16 +39,16 @@ extern int xfs_dir_isempty(struct xfs_inode *dp);
extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_inode *pdp);
extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, xfs_ino_t inum,
const struct xfs_name *name, xfs_ino_t inum,
xfs_extlen_t tot);
extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, xfs_ino_t *inum,
const struct xfs_name *name, xfs_ino_t *inum,
struct xfs_name *ci_name);
extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, xfs_ino_t ino,
xfs_extlen_t tot);
extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, xfs_ino_t inum,
const struct xfs_name *name, xfs_ino_t inum,
xfs_extlen_t tot);
extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name);
......
......@@ -40,7 +40,7 @@ struct xfs_dir3_icfree_hdr {
};
/* xfs_dir2.c */
xfs_dahash_t xfs_ascii_ci_hashname(struct xfs_name *name);
xfs_dahash_t xfs_ascii_ci_hashname(const struct xfs_name *name);
enum xfs_dacmp xfs_ascii_ci_compname(struct xfs_da_args *args,
const unsigned char *name, int len);
extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
......@@ -201,7 +201,8 @@ xfs_dir2_data_entsize(
return round_up(len, XFS_DIR2_DATA_ALIGN);
}
xfs_dahash_t xfs_dir2_hashname(struct xfs_mount *mp, struct xfs_name *name);
xfs_dahash_t xfs_dir2_hashname(struct xfs_mount *mp,
const struct xfs_name *name);
enum xfs_dacmp xfs_dir2_compname(struct xfs_da_args *args,
const unsigned char *name, int len);
......
......@@ -463,7 +463,7 @@ xfs_bui_item_recover(
struct xfs_bui_log_item *buip = BUI_ITEM(lip);
struct xfs_trans *tp;
struct xfs_inode *ip = NULL;
struct xfs_mount *mp = lip->li_mountp;
struct xfs_mount *mp = lip->li_log->l_mp;
struct xfs_map_extent *bmap;
struct xfs_bud_log_item *budp;
xfs_filblks_t count;
......
......@@ -14,6 +14,7 @@
#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_log_recover.h"
#include "xfs_log_priv.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_errortag.h"
......@@ -813,7 +814,15 @@ xfs_buf_read_map(
* buffer.
*/
if (error) {
if (!xfs_is_shutdown(target->bt_mount))
/*
* Check against log shutdown for error reporting because
* metadata writeback may require a read first and we need to
* report errors in metadata writeback until the log is shut
* down. High level transaction read functions already check
* against mount shutdown, anyway, so we only need to be
* concerned about low level IO interactions here.
*/
if (!xlog_is_shutdown(target->bt_mount->m_log))
xfs_buf_ioerror_alert(bp, fa);
bp->b_flags &= ~XBF_DONE;
......@@ -1174,10 +1183,10 @@ xfs_buf_ioend_handle_error(
struct xfs_error_cfg *cfg;
/*
* If we've already decided to shutdown the filesystem because of I/O
* errors, there's no point in giving this a retry.
* If we've already shutdown the journal because of I/O errors, there's
* no point in giving this a retry.
*/
if (xfs_is_shutdown(mp))
if (xlog_is_shutdown(mp->m_log))
goto out_stale;
xfs_buf_ioerror_alert_ratelimited(bp);
......@@ -1588,8 +1597,23 @@ __xfs_buf_submit(
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
/* on shutdown we stale and complete the buffer immediately */
if (xfs_is_shutdown(bp->b_mount)) {
/*
* On log shutdown we stale and complete the buffer immediately. We can
* be called to read the superblock before the log has been set up, so
* be careful checking the log state.
*
* Checking the mount shutdown state here can result in the log tail
* moving inappropriately on disk as the log may not yet be shut down.
* i.e. failing this buffer on mount shutdown can remove it from the AIL
* and move the tail of the log forwards without having written this
* buffer to disk. This corrupts the log tail state in memory, and
* because the log may not be shut down yet, it can then be propagated
* to disk before the log is shutdown. Hence we check log shutdown
* state here rather than mount state to avoid corrupting the log tail
* on shutdown.
*/
if (bp->b_mount->m_log &&
xlog_is_shutdown(bp->b_mount->m_log)) {
xfs_buf_ioend_fail(bp);
return -EIO;
}
......@@ -1803,10 +1827,10 @@ xfs_buftarg_drain(
* If one or more failed buffers were freed, that means dirty metadata
* was thrown away. This should only ever happen after I/O completion
* handling has elevated I/O error(s) to permanent failures and shuts
* down the fs.
* down the journal.
*/
if (write_fail) {
ASSERT(xfs_is_shutdown(btp->bt_mount));
ASSERT(xlog_is_shutdown(btp->bt_mount->m_log));
xfs_alert(btp->bt_mount,
"Please run xfs_repair to determine the extent of the problem.");
}
......@@ -2089,12 +2113,13 @@ xfs_buf_delwri_submit_buffers(
blk_start_plug(&plug);
list_for_each_entry_safe(bp, n, buffer_list, b_list) {
if (!wait_list) {
if (!xfs_buf_trylock(bp))
continue;
if (xfs_buf_ispinned(bp)) {
xfs_buf_unlock(bp);
pinned++;
continue;
}
if (!xfs_buf_trylock(bp))
continue;
} else {
xfs_buf_lock(bp);
}
......
......@@ -21,6 +21,7 @@
#include "xfs_dquot.h"
#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
struct kmem_cache *xfs_buf_item_cache;
......@@ -428,7 +429,7 @@ xfs_buf_item_format(
* occurs during recovery.
*/
if (bip->bli_flags & XFS_BLI_INODE_BUF) {
if (xfs_has_v3inodes(lip->li_mountp) ||
if (xfs_has_v3inodes(lip->li_log->l_mp) ||
!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
xfs_log_item_in_current_chkpt(lip)))
bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF;
......@@ -616,7 +617,7 @@ xfs_buf_item_put(
* that case, the bli is freed on buffer writeback completion.
*/
aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
xfs_is_shutdown(lip->li_mountp);
xlog_is_shutdown(lip->li_log);
dirty = bip->bli_flags & XFS_BLI_DIRTY;
if (dirty && !aborted)
return false;
......
......@@ -604,7 +604,7 @@ xfs_efi_item_recover(
struct list_head *capture_list)
{
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
struct xfs_mount *mp = lip->li_mountp;
struct xfs_mount *mp = lip->li_log->l_mp;
struct xfs_efd_log_item *efdp;
struct xfs_trans *tp;
struct xfs_extent *extp;
......
......@@ -864,8 +864,8 @@ xfs_getfsmap(
!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
return -EINVAL;
use_rmap = capable(CAP_SYS_ADMIN) &&
xfs_has_rmapbt(mp);
use_rmap = xfs_has_rmapbt(mp) &&
has_capability_noaudit(current, CAP_SYS_ADMIN);
head->fmh_entries = 0;
/* Set up our device handlers. */
......
......@@ -23,6 +23,7 @@
#include "xfs_reflink.h"
#include "xfs_ialloc.h"
#include "xfs_ag.h"
#include "xfs_log_priv.h"
#include <linux/iversion.h>
......@@ -873,7 +874,14 @@ xfs_reclaim_inode(
if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING))
goto out_iunlock;
if (xfs_is_shutdown(ip->i_mount)) {
/*
* Check for log shutdown because aborting the inode can move the log
* tail and corrupt in memory state. This is fine if the log is shut
* down, but if the log is still active and only the mount is shut down
* then the in-memory log tail movement caused by the abort can be
* incorrectly propagated to disk.
*/
if (xlog_is_shutdown(ip->i_mount->m_log)) {
xfs_iunpin_wait(ip);
xfs_iflush_abort(ip);
goto reclaim;
......
......@@ -35,6 +35,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_reflink.h"
#include "xfs_ag.h"
#include "xfs_log_priv.h"
struct kmem_cache *xfs_inode_cache;
......@@ -658,9 +659,9 @@ xfs_ip2xflags(
*/
int
xfs_lookup(
xfs_inode_t *dp,
struct xfs_name *name,
xfs_inode_t **ipp,
struct xfs_inode *dp,
const struct xfs_name *name,
struct xfs_inode **ipp,
struct xfs_name *ci_name)
{
xfs_ino_t inum;
......@@ -1217,7 +1218,7 @@ xfs_link(
{
xfs_mount_t *mp = tdp->i_mount;
xfs_trans_t *tp;
int error;
int error, nospace_error = 0;
int resblks;
trace_xfs_link(tdp, target_name);
......@@ -1236,19 +1237,11 @@ xfs_link(
goto std_return;
resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp);
if (error == -ENOSPC) {
resblks = 0;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp);
}
error = xfs_trans_alloc_dir(tdp, &M_RES(mp)->tr_link, sip, &resblks,
&tp, &nospace_error);
if (error)
goto std_return;
xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK,
XFS_IEXT_DIR_MANIP_CNT(mp));
if (error)
......@@ -1306,6 +1299,8 @@ xfs_link(
error_return:
xfs_trans_cancel(tp);
std_return:
if (error == -ENOSPC && nospace_error)
error = nospace_error;
return error;
}
......@@ -2755,6 +2750,7 @@ xfs_remove(
xfs_mount_t *mp = dp->i_mount;
xfs_trans_t *tp = NULL;
int is_dir = S_ISDIR(VFS_I(ip)->i_mode);
int dontcare;
int error = 0;
uint resblks;
......@@ -2772,31 +2768,24 @@ xfs_remove(
goto std_return;
/*
* We try to get the real space reservation first,
* allowing for directory btree deletion(s) implying
* possible bmap insert(s). If we can't get the space
* reservation then we use 0 instead, and avoid the bmap
* btree insert(s) in the directory code by, if the bmap
* insert tries to happen, instead trimming the LAST
* block from the directory.
* We try to get the real space reservation first, allowing for
* directory btree deletion(s) implying possible bmap insert(s). If we
* can't get the space reservation then we use 0 instead, and avoid the
* bmap btree insert(s) in the directory code by, if the bmap insert
* tries to happen, instead trimming the LAST block from the directory.
*
* Ignore EDQUOT and ENOSPC being returned via nospace_error because
* the directory code can handle a reservationless update and we don't
* want to prevent a user from trying to free space by deleting things.
*/
resblks = XFS_REMOVE_SPACE_RES(mp);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp);
if (error == -ENOSPC) {
resblks = 0;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0,
&tp);
}
error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, ip, &resblks,
&tp, &dontcare);
if (error) {
ASSERT(error != -ENOSPC);
goto std_return;
}
xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
/*
* If we're removing a directory perform some additional validation.
*/
......@@ -3109,7 +3098,8 @@ xfs_rename(
bool new_parent = (src_dp != target_dp);
bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
int spaceres;
int error;
bool retried = false;
int error, nospace_error = 0;
trace_xfs_rename(src_dp, target_dp, src_name, target_name);
......@@ -3133,9 +3123,12 @@ xfs_rename(
xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
inodes, &num_inodes);
retry:
nospace_error = 0;
spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
if (error == -ENOSPC) {
nospace_error = error;
spaceres = 0;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0,
&tp);
......@@ -3189,6 +3182,31 @@ xfs_rename(
target_dp, target_name, target_ip,
spaceres);
/*
* Try to reserve quota to handle an expansion of the target directory.
* We'll allow the rename to continue in reservationless mode if we hit
* a space usage constraint. If we trigger reservationless mode, save
* the errno if there isn't any free space in the target directory.
*/
if (spaceres != 0) {
error = xfs_trans_reserve_quota_nblks(tp, target_dp, spaceres,
0, false);
if (error == -EDQUOT || error == -ENOSPC) {
if (!retried) {
xfs_trans_cancel(tp);
xfs_blockgc_free_quota(target_dp, 0);
retried = true;
goto retry;
}
nospace_error = error;
spaceres = 0;
error = 0;
}
if (error)
goto out_trans_cancel;
}
/*
* Check for expected errors before we dirty the transaction
* so we can return an error without a transaction abort.
......@@ -3435,6 +3453,8 @@ xfs_rename(
out_release_wip:
if (wip)
xfs_irele(wip);
if (error == -ENOSPC && nospace_error)
error = nospace_error;
return error;
}
......@@ -3659,7 +3679,7 @@ xfs_iflush_cluster(
* AIL, leaving a dirty/unpinned inode attached to the buffer
* that otherwise looks like it should be flushed.
*/
if (xfs_is_shutdown(mp)) {
if (xlog_is_shutdown(mp->m_log)) {
xfs_iunpin_wait(ip);
xfs_iflush_abort(ip);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
......@@ -3685,9 +3705,19 @@ xfs_iflush_cluster(
}
if (error) {
/*
* Shutdown first so we kill the log before we release this
* buffer. If it is an INODE_ALLOC buffer and pins the tail
* of the log, failing it before the _log_ is shut down can
* result in the log tail being moved forward in the journal
* on disk because log writes can still be taking place. Hence
* unpinning the tail will allow the ICREATE intent to be
* removed from the log an recovery will fail with uninitialised
* inode cluster buffers.
*/
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
bp->b_flags |= XBF_ASYNC;
xfs_buf_ioend_fail(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
return error;
}
......
......@@ -402,7 +402,7 @@ enum layout_break_reason {
int xfs_release(struct xfs_inode *ip);
void xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name,
struct xfs_inode **ipp, struct xfs_name *ci_name);
int xfs_create(struct user_namespace *mnt_userns,
struct xfs_inode *dp, struct xfs_name *name,
......
......@@ -17,6 +17,7 @@
#include "xfs_trans_priv.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_error.h"
#include <linux/iversion.h>
......@@ -720,6 +721,17 @@ xfs_iflush_ail_updates(
if (INODE_ITEM(lip)->ili_flush_lsn != lip->li_lsn)
continue;
/*
* dgc: Not sure how this happens, but it happens very
* occassionaly via generic/388. xfs_iflush_abort() also
* silently handles this same "under writeback but not in AIL at
* shutdown" condition via xfs_trans_ail_delete().
*/
if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
ASSERT(xlog_is_shutdown(lip->li_log));
continue;
}
lsn = xfs_ail_delete_one(ailp, lip);
if (!tail_lsn && lsn)
tail_lsn = lsn;
......
......@@ -1189,7 +1189,7 @@ xfs_ioctl_setattr_get_trans(
goto out_error;
error = xfs_trans_alloc_ichange(ip, NULL, NULL, pdqp,
capable(CAP_FOWNER), &tp);
has_capability_noaudit(current, CAP_FOWNER), &tp);
if (error)
goto out_error;
......
......@@ -613,37 +613,6 @@ xfs_vn_getattr(
return 0;
}
static void
xfs_setattr_mode(
struct xfs_inode *ip,
struct iattr *iattr)
{
struct inode *inode = VFS_I(ip);
umode_t mode = iattr->ia_mode;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
inode->i_mode &= S_IFMT;
inode->i_mode |= mode & ~S_IFMT;
}
void
xfs_setattr_time(
struct xfs_inode *ip,
struct iattr *iattr)
{
struct inode *inode = VFS_I(ip);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (iattr->ia_valid & ATTR_ATIME)
inode->i_atime = iattr->ia_atime;
if (iattr->ia_valid & ATTR_CTIME)
inode->i_ctime = iattr->ia_ctime;
if (iattr->ia_valid & ATTR_MTIME)
inode->i_mtime = iattr->ia_mtime;
}
static int
xfs_vn_change_ok(
struct user_namespace *mnt_userns,
......@@ -678,10 +647,10 @@ xfs_setattr_nonsize(
int mask = iattr->ia_valid;
xfs_trans_t *tp;
int error;
kuid_t uid = GLOBAL_ROOT_UID, iuid = GLOBAL_ROOT_UID;
kgid_t gid = GLOBAL_ROOT_GID, igid = GLOBAL_ROOT_GID;
kuid_t uid = GLOBAL_ROOT_UID;
kgid_t gid = GLOBAL_ROOT_GID;
struct xfs_dquot *udqp = NULL, *gdqp = NULL;
struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL;
struct xfs_dquot *old_udqp = NULL, *old_gdqp = NULL;
ASSERT((mask & ATTR_SIZE) == 0);
......@@ -723,66 +692,30 @@ xfs_setattr_nonsize(
}
error = xfs_trans_alloc_ichange(ip, udqp, gdqp, NULL,
capable(CAP_FOWNER), &tp);
has_capability_noaudit(current, CAP_FOWNER), &tp);
if (error)
goto out_dqrele;
/*
* Change file ownership. Must be the owner or privileged.
* Register quota modifications in the transaction. Must be the owner
* or privileged. These IDs could have changed since we last looked at
* them. But, we're assured that if the ownership did change while we
* didn't have the inode locked, inode's dquot(s) would have changed
* also.
*/
if (mask & (ATTR_UID|ATTR_GID)) {
/*
* These IDs could have changed since we last looked at them.
* But, we're assured that if the ownership did change
* while we didn't have the inode locked, inode's dquot(s)
* would have changed also.
*/
iuid = inode->i_uid;
igid = inode->i_gid;
gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
/*
* CAP_FSETID overrides the following restrictions:
*
* The set-user-ID and set-group-ID bits of a file will be
* cleared upon successful return from chown()
*/
if ((inode->i_mode & (S_ISUID|S_ISGID)) &&
!capable(CAP_FSETID))
inode->i_mode &= ~(S_ISUID|S_ISGID);
/*
* Change the ownerships and register quota modifications
* in the transaction.
*/
if (!uid_eq(iuid, uid)) {
if (XFS_IS_UQUOTA_ON(mp)) {
ASSERT(mask & ATTR_UID);
ASSERT(udqp);
olddquot1 = xfs_qm_vop_chown(tp, ip,
&ip->i_udquot, udqp);
}
inode->i_uid = uid;
}
if (!gid_eq(igid, gid)) {
if (XFS_IS_GQUOTA_ON(mp)) {
ASSERT(xfs_has_pquotino(mp) ||
!XFS_IS_PQUOTA_ON(mp));
ASSERT(mask & ATTR_GID);
ASSERT(gdqp);
olddquot2 = xfs_qm_vop_chown(tp, ip,
&ip->i_gdquot, gdqp);
}
inode->i_gid = gid;
}
if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp) &&
!uid_eq(inode->i_uid, iattr->ia_uid)) {
ASSERT(udqp);
old_udqp = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp);
}
if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp) &&
!gid_eq(inode->i_gid, iattr->ia_gid)) {
ASSERT(xfs_has_pquotino(mp) || !XFS_IS_PQUOTA_ON(mp));
ASSERT(gdqp);
old_gdqp = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp);
}
if (mask & ATTR_MODE)
xfs_setattr_mode(ip, iattr);
if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
xfs_setattr_time(ip, iattr);
setattr_copy(mnt_userns, inode, iattr);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
XFS_STATS_INC(mp, xs_ig_attrchg);
......@@ -794,8 +727,8 @@ xfs_setattr_nonsize(
/*
* Release any dquot(s) the inode had kept before chown.
*/
xfs_qm_dqrele(olddquot1);
xfs_qm_dqrele(olddquot2);
xfs_qm_dqrele(old_udqp);
xfs_qm_dqrele(old_gdqp);
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
......@@ -1006,11 +939,8 @@ xfs_setattr_size(
xfs_inode_clear_eofblocks_tag(ip);
}
if (iattr->ia_valid & ATTR_MODE)
xfs_setattr_mode(ip, iattr);
if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
xfs_setattr_time(ip, iattr);
ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
setattr_copy(mnt_userns, inode, iattr);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
XFS_STATS_INC(mp, xs_ig_attrchg);
......
......@@ -812,10 +812,9 @@ xfs_log_mount_finish(
* mount failure occurs.
*/
mp->m_super->s_flags |= SB_ACTIVE;
xfs_log_work_queue(mp);
if (xlog_recovery_needed(log))
error = xlog_recover_finish(log);
if (!error)
xfs_log_work_queue(mp);
mp->m_super->s_flags &= ~SB_ACTIVE;
evict_inodes(mp->m_super);
......@@ -1102,7 +1101,7 @@ xfs_log_item_init(
int type,
const struct xfs_item_ops *ops)
{
item->li_mountp = mp;
item->li_log = mp->m_log;
item->li_ailp = mp->m_ail;
item->li_type = type;
item->li_ops = ops;
......
......@@ -1243,18 +1243,27 @@ xlog_cil_push_now(
if (!async)
flush_workqueue(cil->xc_push_wq);
spin_lock(&cil->xc_push_lock);
/*
* If this is an async flush request, we always need to set the
* xc_push_commit_stable flag even if something else has already queued
* a push. The flush caller is asking for the CIL to be on stable
* storage when the next push completes, so regardless of who has queued
* the push, the flush requires stable semantics from it.
*/
cil->xc_push_commit_stable = async;
/*
* If the CIL is empty or we've already pushed the sequence then
* there's no work we need to do.
* there's no more work that we need to do.
*/
spin_lock(&cil->xc_push_lock);
if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) {
spin_unlock(&cil->xc_push_lock);
return;
}
cil->xc_push_seq = push_seq;
cil->xc_push_commit_stable = async;
queue_work(cil->xc_push_wq, &cil->xc_ctx->push_work);
spin_unlock(&cil->xc_push_lock);
}
......@@ -1352,6 +1361,13 @@ xlog_cil_flush(
trace_xfs_log_force(log->l_mp, seq, _RET_IP_);
xlog_cil_push_now(log, seq, true);
/*
* If the CIL is empty, make sure that any previous checkpoint that may
* still be in an active iclog is pushed to stable storage.
*/
if (list_empty(&log->l_cilp->xc_cil))
xfs_log_force(log->l_mp, 0);
}
/*
......@@ -1468,7 +1484,7 @@ bool
xfs_log_item_in_current_chkpt(
struct xfs_log_item *lip)
{
struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp;
struct xfs_cil *cil = lip->li_log->l_cilp;
if (list_empty(&lip->li_cil))
return false;
......
......@@ -319,7 +319,8 @@ xfs_fs_commit_blocks(
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_setattr_time(ip, iattr);
ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
setattr_copy(&init_user_ns, inode, iattr);
if (update_isize) {
i_size_write(inode, iattr->ia_size);
ip->i_disk_size = iattr->ia_size;
......
......@@ -25,6 +25,7 @@
#include "xfs_error.h"
#include "xfs_ag.h"
#include "xfs_ialloc.h"
#include "xfs_log_priv.h"
/*
* The global quota manager. There is only one of these for the entire
......@@ -121,8 +122,7 @@ xfs_qm_dqpurge(
struct xfs_dquot *dqp,
void *data)
{
struct xfs_mount *mp = dqp->q_mount;
struct xfs_quotainfo *qi = mp->m_quotainfo;
struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo;
int error = -EAGAIN;
xfs_dqlock(dqp);
......@@ -157,7 +157,7 @@ xfs_qm_dqpurge(
}
ASSERT(atomic_read(&dqp->q_pincount) == 0);
ASSERT(xfs_is_shutdown(mp) ||
ASSERT(xlog_is_shutdown(dqp->q_logitem.qli_item.li_log) ||
!test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags));
xfs_dqfunlock(dqp);
......@@ -172,7 +172,7 @@ xfs_qm_dqpurge(
*/
ASSERT(!list_empty(&dqp->q_lru));
list_lru_del(&qi->qi_lru, &dqp->q_lru);
XFS_STATS_DEC(mp, xs_qm_dquot_unused);
XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
xfs_qm_dqdestroy(dqp);
return 0;
......
......@@ -457,7 +457,7 @@ xfs_cui_item_recover(
struct xfs_cud_log_item *cudp;
struct xfs_trans *tp;
struct xfs_btree_cur *rcur = NULL;
struct xfs_mount *mp = lip->li_mountp;
struct xfs_mount *mp = lip->li_log->l_mp;
xfs_fsblock_t new_fsb;
xfs_extlen_t new_len;
unsigned int refc_type;
......
......@@ -425,7 +425,10 @@ xfs_reflink_allocate_cow(
if (!convert_now || cmap->br_state == XFS_EXT_NORM)
return 0;
trace_xfs_reflink_convert_cow(ip, cmap);
return xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb);
error = xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb);
if (!error)
cmap->br_state = XFS_EXT_NORM;
return error;
out_trans_cancel:
xfs_trans_cancel(tp);
......
......@@ -510,7 +510,7 @@ xfs_rui_item_recover(
struct xfs_rud_log_item *rudp;
struct xfs_trans *tp;
struct xfs_btree_cur *rcur = NULL;
struct xfs_mount *mp = lip->li_mountp;
struct xfs_mount *mp = lip->li_log->l_mp;
enum xfs_rmap_intent_type type;
xfs_exntst_t state;
int i;
......
......@@ -933,7 +933,7 @@ DEFINE_IREF_EVENT(xfs_inode_unpin);
DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
DECLARE_EVENT_CLASS(xfs_namespace_class,
TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name),
TP_ARGS(dp, name),
TP_STRUCT__entry(
__field(dev_t, dev)
......@@ -956,7 +956,7 @@ DECLARE_EVENT_CLASS(xfs_namespace_class,
#define DEFINE_NAMESPACE_EVENT(name) \
DEFINE_EVENT(xfs_namespace_class, name, \
TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name), \
TP_ARGS(dp, name))
DEFINE_NAMESPACE_EVENT(xfs_remove);
DEFINE_NAMESPACE_EVENT(xfs_link);
......@@ -1308,7 +1308,7 @@ DECLARE_EVENT_CLASS(xfs_log_item_class,
__field(xfs_lsn_t, lsn)
),
TP_fast_assign(
__entry->dev = lip->li_mountp->m_super->s_dev;
__entry->dev = lip->li_log->l_mp->m_super->s_dev;
__entry->lip = lip;
__entry->type = lip->li_type;
__entry->flags = lip->li_flags;
......@@ -1361,7 +1361,7 @@ DECLARE_EVENT_CLASS(xfs_ail_class,
__field(xfs_lsn_t, new_lsn)
),
TP_fast_assign(
__entry->dev = lip->li_mountp->m_super->s_dev;
__entry->dev = lip->li_log->l_mp->m_super->s_dev;
__entry->lip = lip;
__entry->type = lip->li_type;
__entry->flags = lip->li_flags;
......
......@@ -648,7 +648,7 @@ xfs_trans_add_item(
struct xfs_trans *tp,
struct xfs_log_item *lip)
{
ASSERT(lip->li_mountp == tp->t_mountp);
ASSERT(lip->li_log == tp->t_mountp->m_log);
ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
ASSERT(list_empty(&lip->li_trans));
ASSERT(!test_bit(XFS_LI_DIRTY, &lip->li_flags));
......@@ -775,7 +775,7 @@ xfs_trans_committed_bulk(
* object into the AIL as we are in a shutdown situation.
*/
if (aborted) {
ASSERT(xfs_is_shutdown(ailp->ail_mount));
ASSERT(xlog_is_shutdown(ailp->ail_log));
if (lip->li_ops->iop_unpin)
lip->li_ops->iop_unpin(lip, 1);
continue;
......@@ -1210,3 +1210,89 @@ xfs_trans_alloc_ichange(
xfs_trans_cancel(tp);
return error;
}
/*
* Allocate an transaction, lock and join the directory and child inodes to it,
* and reserve quota for a directory update. If there isn't sufficient space,
* @dblocks will be set to zero for a reservationless directory update and
* @nospace_error will be set to a negative errno describing the space
* constraint we hit.
*
* The caller must ensure that the on-disk dquots attached to this inode have
* already been allocated and initialized. The ILOCKs will be dropped when the
* transaction is committed or cancelled.
*/
int
xfs_trans_alloc_dir(
struct xfs_inode *dp,
struct xfs_trans_res *resv,
struct xfs_inode *ip,
unsigned int *dblocks,
struct xfs_trans **tpp,
int *nospace_error)
{
struct xfs_trans *tp;
struct xfs_mount *mp = ip->i_mount;
unsigned int resblks;
bool retried = false;
int error;
retry:
*nospace_error = 0;
resblks = *dblocks;
error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp);
if (error == -ENOSPC) {
*nospace_error = error;
resblks = 0;
error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp);
}
if (error)
return error;
xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
error = xfs_qm_dqattach_locked(dp, false);
if (error) {
/* Caller should have allocated the dquots! */
ASSERT(error != -ENOENT);
goto out_cancel;
}
error = xfs_qm_dqattach_locked(ip, false);
if (error) {
/* Caller should have allocated the dquots! */
ASSERT(error != -ENOENT);
goto out_cancel;
}
if (resblks == 0)
goto done;
error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false);
if (error == -EDQUOT || error == -ENOSPC) {
if (!retried) {
xfs_trans_cancel(tp);
xfs_blockgc_free_quota(dp, 0);
retried = true;
goto retry;
}
*nospace_error = error;
resblks = 0;
error = 0;
}
if (error)
goto out_cancel;
done:
*tpp = tp;
*dblocks = resblks;
return 0;
out_cancel:
xfs_trans_cancel(tp);
return error;
}
......@@ -8,6 +8,7 @@
/* kernel only transaction subsystem defines */
struct xlog;
struct xfs_buf;
struct xfs_buftarg;
struct xfs_efd_log_item;
......@@ -31,7 +32,7 @@ struct xfs_log_item {
struct list_head li_ail; /* AIL pointers */
struct list_head li_trans; /* transaction list */
xfs_lsn_t li_lsn; /* last on-disk lsn */
struct xfs_mount *li_mountp; /* ptr to fs mount */
struct xlog *li_log;
struct xfs_ail *li_ailp; /* ptr to AIL */
uint li_type; /* item type */
unsigned long li_flags; /* misc flags */
......@@ -259,6 +260,9 @@ int xfs_trans_alloc_icreate(struct xfs_mount *mp, struct xfs_trans_res *resv,
int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp,
struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force,
struct xfs_trans **tpp);
int xfs_trans_alloc_dir(struct xfs_inode *dp, struct xfs_trans_res *resv,
struct xfs_inode *ip, unsigned int *dblocks,
struct xfs_trans **tpp, int *nospace_error);
static inline void
xfs_trans_set_context(
......
......@@ -398,7 +398,7 @@ xfsaild_push_item(
* If log item pinning is enabled, skip the push and track the item as
* pinned. This can help induce head-behind-tail conditions.
*/
if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN))
if (XFS_TEST_ERROR(false, ailp->ail_log->l_mp, XFS_ERRTAG_LOG_ITEM_PIN))
return XFS_ITEM_PINNED;
/*
......@@ -418,7 +418,7 @@ static long
xfsaild_push(
struct xfs_ail *ailp)
{
xfs_mount_t *mp = ailp->ail_mount;
struct xfs_mount *mp = ailp->ail_log->l_mp;
struct xfs_ail_cursor cur;
struct xfs_log_item *lip;
xfs_lsn_t lsn;
......@@ -443,15 +443,27 @@ xfsaild_push(
ailp->ail_log_flush = 0;
XFS_STATS_INC(mp, xs_push_ail_flush);
xlog_cil_flush(mp->m_log);
xlog_cil_flush(ailp->ail_log);
}
spin_lock(&ailp->ail_lock);
/* barrier matches the ail_target update in xfs_ail_push() */
smp_rmb();
target = ailp->ail_target;
ailp->ail_target_prev = target;
/*
* If we have a sync push waiter, we always have to push till the AIL is
* empty. Update the target to point to the end of the AIL so that
* capture updates that occur after the sync push waiter has gone to
* sleep.
*/
if (waitqueue_active(&ailp->ail_empty)) {
lip = xfs_ail_max(ailp);
if (lip)
target = lip->li_lsn;
} else {
/* barrier matches the ail_target update in xfs_ail_push() */
smp_rmb();
target = ailp->ail_target;
ailp->ail_target_prev = target;
}
/* we're done if the AIL is empty or our push has reached the end */
lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->ail_last_pushed_lsn);
......@@ -620,7 +632,7 @@ xfsaild(
* opportunity to release such buffers from the queue.
*/
ASSERT(list_empty(&ailp->ail_buf_list) ||
xfs_is_shutdown(ailp->ail_mount));
xlog_is_shutdown(ailp->ail_log));
xfs_buf_delwri_cancel(&ailp->ail_buf_list);
break;
}
......@@ -683,7 +695,7 @@ xfs_ail_push(
struct xfs_log_item *lip;
lip = xfs_ail_min(ailp);
if (!lip || xfs_is_shutdown(ailp->ail_mount) ||
if (!lip || xlog_is_shutdown(ailp->ail_log) ||
XFS_LSN_CMP(threshold_lsn, ailp->ail_target) <= 0)
return;
......@@ -724,7 +736,6 @@ xfs_ail_push_all_sync(
spin_lock(&ailp->ail_lock);
while ((lip = xfs_ail_max(ailp)) != NULL) {
prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE);
ailp->ail_target = lip->li_lsn;
wake_up_process(ailp->ail_task);
spin_unlock(&ailp->ail_lock);
schedule();
......@@ -740,7 +751,7 @@ xfs_ail_update_finish(
struct xfs_ail *ailp,
xfs_lsn_t old_lsn) __releases(ailp->ail_lock)
{
struct xfs_mount *mp = ailp->ail_mount;
struct xlog *log = ailp->ail_log;
/* if the tail lsn hasn't changed, don't do updates or wakeups. */
if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) {
......@@ -748,13 +759,13 @@ xfs_ail_update_finish(
return;
}
if (!xfs_is_shutdown(mp))
xlog_assign_tail_lsn_locked(mp);
if (!xlog_is_shutdown(log))
xlog_assign_tail_lsn_locked(log->l_mp);
if (list_empty(&ailp->ail_head))
wake_up_all(&ailp->ail_empty);
spin_unlock(&ailp->ail_lock);
xfs_log_space_wake(mp);
xfs_log_space_wake(log->l_mp);
}
/*
......@@ -862,13 +873,13 @@ xfs_trans_ail_delete(
int shutdown_type)
{
struct xfs_ail *ailp = lip->li_ailp;
struct xfs_mount *mp = ailp->ail_mount;
struct xfs_mount *mp = ailp->ail_log->l_mp;
xfs_lsn_t tail_lsn;
spin_lock(&ailp->ail_lock);
if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
spin_unlock(&ailp->ail_lock);
if (shutdown_type && !xfs_is_shutdown(mp)) {
if (shutdown_type && !xlog_is_shutdown(ailp->ail_log)) {
xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
"%s: attempting to delete a log item that is not in the AIL",
__func__);
......@@ -893,7 +904,7 @@ xfs_trans_ail_init(
if (!ailp)
return -ENOMEM;
ailp->ail_mount = mp;
ailp->ail_log = mp->m_log;
INIT_LIST_HEAD(&ailp->ail_head);
INIT_LIST_HEAD(&ailp->ail_cursors);
spin_lock_init(&ailp->ail_lock);
......@@ -901,7 +912,7 @@ xfs_trans_ail_init(
init_waitqueue_head(&ailp->ail_empty);
ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
ailp->ail_mount->m_super->s_id);
mp->m_super->s_id);
if (IS_ERR(ailp->ail_task))
goto out_free_ailp;
......
......@@ -6,6 +6,7 @@
#ifndef __XFS_TRANS_PRIV_H__
#define __XFS_TRANS_PRIV_H__
struct xlog;
struct xfs_log_item;
struct xfs_mount;
struct xfs_trans;
......@@ -50,7 +51,7 @@ struct xfs_ail_cursor {
* Eventually we need to drive the locking in here as well.
*/
struct xfs_ail {
struct xfs_mount *ail_mount;
struct xlog *ail_log;
struct task_struct *ail_task;
struct list_head ail_head;
xfs_lsn_t ail_target;
......
......@@ -360,6 +360,7 @@ bool has_capability_noaudit(struct task_struct *t, int cap)
{
return has_ns_capability_noaudit(t, &init_user_ns, cap);
}
EXPORT_SYMBOL(has_capability_noaudit);
static bool ns_capable_common(struct user_namespace *ns,
int cap,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment