Commit 5385f1a6 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: repair file modes by scanning for a dirent pointing to us

Repair might encounter an inode with a totally garbage i_mode.  To fix
this problem, we have to figure out if the file was a regular file, a
directory, or a special file.  One way to figure this out is to check if
there are any directories with entries pointing down to the busted file.

This patch recovers the file mode by scanning every directory entry on
the filesystem to see if there are any that point to the busted file.
If the ftype of all such dirents are consistent, the mode is recovered
from the ftype.  If no dirents are found, the file becomes a regular
file.  In all cases, ACLs are canceled and the file is made accessible
only by root.

A previous patch attempted to guess the mode by reading the beginning of
the file data.  This was rejected by Christoph on the grounds that we
cannot trust user-controlled data blocks.  Users do not have direct
control over the ondisk contents of directory entries, so this method
should be much safer.

If all the dirents have the same ftype, then we can translate that back
into an S_IFMT flag and fix the file.  If not, reset the mode to
S_IFREG.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 3c79e6a8
...@@ -43,6 +43,8 @@ ...@@ -43,6 +43,8 @@
#include "scrub/btree.h" #include "scrub/btree.h"
#include "scrub/trace.h" #include "scrub/trace.h"
#include "scrub/repair.h" #include "scrub/repair.h"
#include "scrub/iscan.h"
#include "scrub/readdir.h"
/* /*
* Inode Record Repair * Inode Record Repair
...@@ -126,6 +128,10 @@ struct xrep_inode { ...@@ -126,6 +128,10 @@ struct xrep_inode {
/* Must we remove all access from this file? */ /* Must we remove all access from this file? */
bool zap_acls; bool zap_acls;
/* Inode scanner to see if we can find the ftype from dirents */
struct xchk_iscan ftype_iscan;
uint8_t alleged_ftype;
}; };
/* /*
...@@ -227,26 +233,233 @@ xrep_dinode_header( ...@@ -227,26 +233,233 @@ xrep_dinode_header(
dip->di_gen = cpu_to_be32(sc->sm->sm_gen); dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
} }
/* Turn di_mode into /something/ recognizable. */ /*
STATIC void * If this directory entry points to the scrub target inode, then the directory
* we're scanning is the parent of the scrub target inode.
*/
STATIC int
xrep_dinode_findmode_dirent(
struct xfs_scrub *sc,
struct xfs_inode *dp,
xfs_dir2_dataptr_t dapos,
const struct xfs_name *name,
xfs_ino_t ino,
void *priv)
{
struct xrep_inode *ri = priv;
int error = 0;
if (xchk_should_terminate(ri->sc, &error))
return error;
if (ino != sc->sm->sm_ino)
return 0;
/* Ignore garbage directory entry names. */
if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
return -EFSCORRUPTED;
/* Don't pick up dot or dotdot entries; we only want child dirents. */
if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
xfs_dir2_samename(name, &xfs_name_dot))
return 0;
/*
* Uhoh, more than one parent for this inode and they don't agree on
* the file type?
*/
if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN &&
ri->alleged_ftype != name->type) {
trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type,
ri->alleged_ftype);
return -EFSCORRUPTED;
}
/* We found a potential parent; remember the ftype. */
trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type);
ri->alleged_ftype = name->type;
return 0;
}
/*
* If this is a directory, walk the dirents looking for any that point to the
* scrub target inode.
*/
STATIC int
xrep_dinode_findmode_walk_directory(
struct xrep_inode *ri,
struct xfs_inode *dp)
{
struct xfs_scrub *sc = ri->sc;
unsigned int lock_mode;
int error = 0;
/*
* Scan the directory to see if there it contains an entry pointing to
* the directory that we are repairing.
*/
lock_mode = xfs_ilock_data_map_shared(dp);
/*
* If this directory is known to be sick, we cannot scan it reliably
* and must abort.
*/
if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
XFS_SICK_INO_BMBTD |
XFS_SICK_INO_DIR)) {
error = -EFSCORRUPTED;
goto out_unlock;
}
/*
* We cannot complete our parent pointer scan if a directory looks as
* though it has been zapped by the inode record repair code.
*/
if (xchk_dir_looks_zapped(dp)) {
error = -EBUSY;
goto out_unlock;
}
error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri);
if (error)
goto out_unlock;
out_unlock:
xfs_iunlock(dp, lock_mode);
return error;
}
/*
* Try to find the mode of the inode being repaired by looking for directories
* that point down to this file.
*/
STATIC int
xrep_dinode_find_mode(
struct xrep_inode *ri,
uint16_t *mode)
{
struct xfs_scrub *sc = ri->sc;
struct xfs_inode *dp;
int error;
/* No ftype means we have no other metadata to consult. */
if (!xfs_has_ftype(sc->mp)) {
*mode = S_IFREG;
return 0;
}
/*
* Scan all directories for parents that might point down to this
* inode. Skip the inode being repaired during the scan since it
* cannot be its own parent. Note that we still hold the AGI locked
* so there's a real possibility that _iscan_iter can return EBUSY.
*/
xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan);
ri->ftype_iscan.skip_ino = sc->sm->sm_ino;
ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN;
while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) {
if (S_ISDIR(VFS_I(dp)->i_mode))
error = xrep_dinode_findmode_walk_directory(ri, dp);
xchk_iscan_mark_visited(&ri->ftype_iscan, dp);
xchk_irele(sc, dp);
if (error < 0)
break;
if (xchk_should_terminate(sc, &error))
break;
}
xchk_iscan_iter_finish(&ri->ftype_iscan);
xchk_iscan_teardown(&ri->ftype_iscan);
if (error == -EBUSY) {
if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) {
/*
* If we got an EBUSY after finding at least one
* dirent, that means the scan found an inode on the
* inactivation list and could not open it. Accept the
* alleged ftype and install a new mode below.
*/
error = 0;
} else if (!(sc->flags & XCHK_TRY_HARDER)) {
/*
* Otherwise, retry the operation one time to see if
* the reason for the delay is an inode from the same
* cluster buffer waiting on the inactivation list.
*/
error = -EDEADLOCK;
}
}
if (error)
return error;
/*
* Convert the discovered ftype into the file mode. If all else fails,
* return S_IFREG.
*/
switch (ri->alleged_ftype) {
case XFS_DIR3_FT_DIR:
*mode = S_IFDIR;
break;
case XFS_DIR3_FT_WHT:
case XFS_DIR3_FT_CHRDEV:
*mode = S_IFCHR;
break;
case XFS_DIR3_FT_BLKDEV:
*mode = S_IFBLK;
break;
case XFS_DIR3_FT_FIFO:
*mode = S_IFIFO;
break;
case XFS_DIR3_FT_SOCK:
*mode = S_IFSOCK;
break;
case XFS_DIR3_FT_SYMLINK:
*mode = S_IFLNK;
break;
default:
*mode = S_IFREG;
break;
}
return 0;
}
/* Turn di_mode into /something/ recognizable. Returns true if we succeed. */
STATIC int
xrep_dinode_mode( xrep_dinode_mode(
struct xrep_inode *ri, struct xrep_inode *ri,
struct xfs_dinode *dip) struct xfs_dinode *dip)
{ {
struct xfs_scrub *sc = ri->sc; struct xfs_scrub *sc = ri->sc;
uint16_t mode = be16_to_cpu(dip->di_mode); uint16_t mode = be16_to_cpu(dip->di_mode);
int error;
trace_xrep_dinode_mode(sc, dip); trace_xrep_dinode_mode(sc, dip);
if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN) if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
return; return 0;
/* Try to fix the mode. If we cannot, then leave everything alone. */
error = xrep_dinode_find_mode(ri, &mode);
switch (error) {
case -EINTR:
case -EBUSY:
case -EDEADLOCK:
/* temporary failure or fatal signal */
return error;
case 0:
/* found mode */
break;
default:
/* some other error, assume S_IFREG */
mode = S_IFREG;
break;
}
/* bad mode, so we set it to a file that only root can read */ /* bad mode, so we set it to a file that only root can read */
mode = S_IFREG;
dip->di_mode = cpu_to_be16(mode); dip->di_mode = cpu_to_be16(mode);
dip->di_uid = 0; dip->di_uid = 0;
dip->di_gid = 0; dip->di_gid = 0;
ri->zap_acls = true; ri->zap_acls = true;
return 0;
} }
/* Fix any conflicting flags that the verifiers complain about. */ /* Fix any conflicting flags that the verifiers complain about. */
...@@ -1107,12 +1320,15 @@ xrep_dinode_core( ...@@ -1107,12 +1320,15 @@ xrep_dinode_core(
/* Fix everything the verifier will complain about. */ /* Fix everything the verifier will complain about. */
dip = xfs_buf_offset(bp, ri->imap.im_boffset); dip = xfs_buf_offset(bp, ri->imap.im_boffset);
xrep_dinode_header(sc, dip); xrep_dinode_header(sc, dip);
xrep_dinode_mode(ri, dip); iget_error = xrep_dinode_mode(ri, dip);
if (iget_error)
goto write;
xrep_dinode_flags(sc, dip, ri->rt_extents > 0); xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
xrep_dinode_size(ri, dip); xrep_dinode_size(ri, dip);
xrep_dinode_extsize_hints(sc, dip); xrep_dinode_extsize_hints(sc, dip);
xrep_dinode_zap_forks(ri, dip); xrep_dinode_zap_forks(ri, dip);
write:
/* Write out the inode. */ /* Write out the inode. */
trace_xrep_dinode_fixed(sc, dip); trace_xrep_dinode_fixed(sc, dip);
xfs_dinode_calc_crc(sc->mp, dip); xfs_dinode_calc_crc(sc->mp, dip);
...@@ -1128,7 +1344,8 @@ xrep_dinode_core( ...@@ -1128,7 +1344,8 @@ xrep_dinode_core(
* accessing the inode. If iget fails, we still need to commit the * accessing the inode. If iget fails, we still need to commit the
* changes. * changes.
*/ */
iget_error = xchk_iget(sc, ino, &sc->ip); if (!iget_error)
iget_error = xchk_iget(sc, ino, &sc->ip);
if (!iget_error) if (!iget_error)
xchk_ilock(sc, XFS_IOLOCK_EXCL); xchk_ilock(sc, XFS_IOLOCK_EXCL);
...@@ -1496,6 +1713,13 @@ xrep_inode( ...@@ -1496,6 +1713,13 @@ xrep_inode(
ASSERT(ri != NULL); ASSERT(ri != NULL);
error = xrep_dinode_problems(ri); error = xrep_dinode_problems(ri);
if (error == -EBUSY) {
/*
* Directory scan to recover inode mode encountered a
* busy inode, so we did not continue repairing things.
*/
return 0;
}
if (error) if (error)
return error; return error;
......
...@@ -51,6 +51,32 @@ ...@@ -51,6 +51,32 @@
* scanner's observations must be updated. * scanner's observations must be updated.
*/ */
/*
* If the inobt record @rec covers @iscan->skip_ino, mark the inode free so
* that the scan ignores that inode.
*/
STATIC void
xchk_iscan_mask_skipino(
struct xchk_iscan *iscan,
struct xfs_perag *pag,
struct xfs_inobt_rec_incore *rec,
xfs_agino_t lastrecino)
{
struct xfs_scrub *sc = iscan->sc;
struct xfs_mount *mp = sc->mp;
xfs_agnumber_t skip_agno = XFS_INO_TO_AGNO(mp, iscan->skip_ino);
xfs_agnumber_t skip_agino = XFS_INO_TO_AGINO(mp, iscan->skip_ino);
if (pag->pag_agno != skip_agno)
return;
if (skip_agino < rec->ir_startino)
return;
if (skip_agino > lastrecino)
return;
rec->ir_free |= xfs_inobt_maskn(skip_agino - rec->ir_startino, 1);
}
/* /*
* Set *cursor to the next allocated inode after whatever it's set to now. * Set *cursor to the next allocated inode after whatever it's set to now.
* If there are no more inodes in this AG, cursor is set to NULLAGINO. * If there are no more inodes in this AG, cursor is set to NULLAGINO.
...@@ -127,6 +153,9 @@ xchk_iscan_find_next( ...@@ -127,6 +153,9 @@ xchk_iscan_find_next(
if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
continue; continue;
if (iscan->skip_ino)
xchk_iscan_mask_skipino(iscan, pag, &rec, lastino);
/* /*
* If the incoming lookup put us in the middle of an inobt * If the incoming lookup put us in the middle of an inobt
* record, mark it and the previous inodes "free" so that the * record, mark it and the previous inodes "free" so that the
......
...@@ -22,6 +22,9 @@ struct xchk_iscan { ...@@ -22,6 +22,9 @@ struct xchk_iscan {
/* This is the inode that will be examined next. */ /* This is the inode that will be examined next. */
xfs_ino_t cursor_ino; xfs_ino_t cursor_ino;
/* If nonzero and non-NULL, skip this inode when scanning. */
xfs_ino_t skip_ino;
/* /*
* This is the last inode that we've successfully scanned, either * This is the last inode that we've successfully scanned, either
* because the caller scanned it, or we moved the cursor past an empty * because the caller scanned it, or we moved the cursor past an empty
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "xfs_rtbitmap.h" #include "xfs_rtbitmap.h"
#include "xfs_quota.h" #include "xfs_quota.h"
#include "xfs_quota_defs.h" #include "xfs_quota_defs.h"
#include "xfs_da_format.h"
#include "scrub/scrub.h" #include "scrub/scrub.h"
#include "scrub/xfile.h" #include "scrub/xfile.h"
#include "scrub/xfarray.h" #include "scrub/xfarray.h"
......
...@@ -1817,6 +1817,55 @@ TRACE_EVENT(xrep_dinode_count_rmaps, ...@@ -1817,6 +1817,55 @@ TRACE_EVENT(xrep_dinode_count_rmaps,
__entry->attr_extents) __entry->attr_extents)
); );
TRACE_EVENT(xrep_dinode_findmode_dirent,
TP_PROTO(struct xfs_scrub *sc, struct xfs_inode *dp,
unsigned int ftype),
TP_ARGS(sc, dp, ftype),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_ino_t, parent_ino)
__field(unsigned int, ftype)
),
TP_fast_assign(
__entry->dev = sc->mp->m_super->s_dev;
__entry->ino = sc->sm->sm_ino;
__entry->parent_ino = dp->i_ino;
__entry->ftype = ftype;
),
TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx ftype '%s'",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->parent_ino,
__print_symbolic(__entry->ftype, XFS_DIR3_FTYPE_STR))
);
TRACE_EVENT(xrep_dinode_findmode_dirent_inval,
TP_PROTO(struct xfs_scrub *sc, struct xfs_inode *dp,
unsigned int ftype, unsigned int found_ftype),
TP_ARGS(sc, dp, ftype, found_ftype),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_ino_t, parent_ino)
__field(unsigned int, ftype)
__field(unsigned int, found_ftype)
),
TP_fast_assign(
__entry->dev = sc->mp->m_super->s_dev;
__entry->ino = sc->sm->sm_ino;
__entry->parent_ino = dp->i_ino;
__entry->ftype = ftype;
__entry->found_ftype = found_ftype;
),
TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx ftype '%s' found_ftype '%s'",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->parent_ino,
__print_symbolic(__entry->ftype, XFS_DIR3_FTYPE_STR),
__print_symbolic(__entry->found_ftype, XFS_DIR3_FTYPE_STR))
);
TRACE_EVENT(xrep_cow_mark_file_range, TRACE_EVENT(xrep_cow_mark_file_range,
TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t startblock, TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t startblock,
xfs_fileoff_t startoff, xfs_filblks_t blockcount), xfs_fileoff_t startoff, xfs_filblks_t blockcount),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment